From 096e09d83f113767ec3f85bb03a1f7d24c612e64 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sat, 20 Jul 2024 00:33:56 +0530 Subject: [PATCH 01/36] Add luma_meter and tonemapper --- .../nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 16 ++++++++++++++++ .../nbl/builtin/hlsl/tonemapper/operators.hlsl | 16 ++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 4 ++++ 3 files changed, 36 insertions(+) create mode 100644 include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl create mode 100644 include/nbl/builtin/hlsl/tonemapper/operators.hlsl diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl new file mode 100644 index 0000000000..4e18655852 --- /dev/null +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -0,0 +1,16 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ +#define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ + +namespace nbl +{ +namespace hls +{ + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl new file mode 100644 index 0000000000..5ebb5b2ffa --- /dev/null +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -0,0 +1,16 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ + +namespace nbl +{ +namespace hls +{ + +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 8f797b9454..9dd9ddfd42 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -34,6 +34,10 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/barycentric/utils.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ref.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ptr.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/bda_accessor.hlsl") +# luma metering +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/luma_meter/luma_meter.hlsl") +# tonemapper +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tonemapper/operators.hlsl") # bump mapping LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/bump_mapping/fragment.glsl") # TODO: rename to `frag.glsl` LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/bump_mapping/utils.glsl") From 4fd700fe69709ec127f7f42ec09b4f7f4ce0260c Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sat, 20 Jul 2024 00:34:17 +0530 Subject: [PATCH 02/36] Update submodule pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c6d5ee3498..87d4794dcc 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c6d5ee349859ce0b5229bc62a2372fa1d4b6b17c +Subproject commit 87d4794dcc5de8264528292c4a30b5284979754a From 52e7ab24dedb16f6c94855d6f0037e7ea77fba81 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 1 Aug 2024 21:20:52 +0530 Subject: [PATCH 03/36] Convert morton.h to hlsl --- include/nbl/asset/utils/IMeshPacker.h | 2 +- include/nbl/asset/utils/IVirtualTexture.h | 3 +- include/nbl/builtin/hlsl/math/morton.hlsl | 283 ++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 2 + 4 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/asset/utils/IMeshPacker.h b/include/nbl/asset/utils/IMeshPacker.h index 3f09062b18..355d792782 100644 --- a/include/nbl/asset/utils/IMeshPacker.h +++ b/include/nbl/asset/utils/IMeshPacker.h @@ -6,7 +6,7 @@ #define __NBL_ASSET_I_MESH_PACKER_H_INCLUDED__ #include "nbl/asset/utils/IMeshManipulator.h" -#include "nbl/core/math/morton.h" +#include "nbl/builtin/hlsl/math/morton.hlsl" namespace nbl { diff --git a/include/nbl/asset/utils/IVirtualTexture.h b/include/nbl/asset/utils/IVirtualTexture.h index ec26f56103..64ea49cbe7 100644 --- a/include/nbl/asset/utils/IVirtualTexture.h +++ b/include/nbl/asset/utils/IVirtualTexture.h @@ -7,7 +7,6 @@ #include -#include "nbl/core/math/morton.h" #include "nbl/core/memory/memory.h" #include "nbl/core/alloc/GeneralpurposeAddressAllocator.h" #include "nbl/core/alloc/PoolAddressAllocator.h" @@ -19,6 +18,8 @@ #include "nbl/asset/filters/CPaddedCopyImageFilter.h" #include "nbl/asset/filters/CFillImageFilter.h" +#include "nbl/builtin/hlsl/math/morton.hlsl" + namespace nbl::asset { diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..64b0b66cb7 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,283 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ + +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#else +#include +#endif + +namespace nbl +{ +namespace core +{ + +namespace impl +{ + +#ifdef __HLSL_VERSION +template +T morton2d_mask(uint16_t _n) const +{ + const static uint64_t mask[5] = + { + 0x5555555555555555ull, + 0x3333333333333333ull, + 0x0F0F0F0F0F0F0F0Full, + 0x00FF00FF00FF00FFull, + 0x0000FFFF0000FFFFull + }; + return static_cast(mask[_n]); +} + +template +T morton3d_mask(uint16_t _n) const +{ + const static uint64_t mask[5] = + { + 0x1249249249249249ull, + 0x10C30C30C30C30C3ull, + 0x010F00F00F00F00Full, + 0x001F0000FF0000FFull, + 0x001F00000000FFFFull + }; + return static_cast(mask[_n]); +} +template +T morton4d_mask(uint16_t _n) const +{ + const static uint64_t mask[4] = + { + 0x1111111111111111ull, + 0x0303030303030303ull, + 0x000F000F000F000Full, + 0x000000FF000000FFull + }; + return static_cast(mask[_n]); +} + +template +inline T morton2d_decode(T x) +{ + x = x & morton2d_mask(0); + x = (x | (x >> 1)) & morton2d_mask(1); + x = (x | (x >> 2)) & morton2d_mask(2); + if (bitDepth > 8u) + { + x = (x | (x >> 4)) & morton2d_mask(3); + } + if (bitDepth > 16u) + { + x = (x | (x >> 8)) & morton2d_mask(4); + } + if (bitDepth > 32u) + { + x = (x | (x >> 16)); + } + return x; +} + +//! Puts bits on even positions filling gaps with 0s +template +inline T separate_bits_2d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 16)) & morton2d_mask(4); + } + if (bitDepth > 16u) + { + x = (x | (x << 8)) & morton2d_mask(3); + } + if (bitDepth > 8u) + { + x = (x | (x << 4)) & morton2d_mask(2); + } + x = (x | (x << 2)) & morton2d_mask(1); + x = (x | (x << 1)) & morton2d_mask(0); + + return x; +} +template +inline T separate_bits_3d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 32)) & morton3d_mask(4); + } + if (bitDepth > 16u) + { + x = (x | (x << 16)) & morton3d_mask(3); + } + if (bitDepth > 8u) + { + x = (x | (x << 8)) & morton3d_mask(2); + } + x = (x | (x << 4)) & morton3d_mask(1); + x = (x | (x << 2)) & morton3d_mask(0); + + return x; +} +template +inline T separate_bits_4d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 24)) & morton4d_mask(3); + } + if (bitDepth > 16u) + { + x = (x | (x << 12)) & morton4d_mask(2); + } + if (bitDepth > 8u) + { + x = (x | (x << 6)) & morton4d_mask(1); + } + x = (x | (x << 3)) & morton4d_mask(0); + + return x; +} +#else +template +constexpr T morton2d_mask(uint8_t _n) +{ + constexpr uint64_t mask[5] = + { + 0x5555555555555555ull, + 0x3333333333333333ull, + 0x0F0F0F0F0F0F0F0Full, + 0x00FF00FF00FF00FFull, + 0x0000FFFF0000FFFFull + }; + return static_cast(mask[_n]); +} +template +constexpr T morton3d_mask(uint8_t _n) +{ + constexpr uint64_t mask[5] = + { + 0x1249249249249249ull, + 0x10C30C30C30C30C3ull, + 0x010F00F00F00F00Full, + 0x001F0000FF0000FFull, + 0x001F00000000FFFFull + }; + return static_cast(mask[_n]); +} +template +constexpr T morton4d_mask(uint8_t _n) +{ + constexpr uint64_t mask[4] = + { + 0x1111111111111111ull, + 0x0303030303030303ull, + 0x000F000F000F000Full, + 0x000000FF000000FFull + }; + return static_cast(mask[_n]); +} + +template +inline T morton2d_decode(T x) +{ + x = x & morton2d_mask(0); + x = (x | (x >> 1)) & morton2d_mask(1); + x = (x | (x >> 2)) & morton2d_mask(2); + if constexpr (bitDepth > 8u) + { + x = (x | (x >> 4)) & morton2d_mask(3); + } + if constexpr (bitDepth > 16u) + { + x = (x | (x >> 8)) & morton2d_mask(4); + } + if constexpr (bitDepth > 32u) + { + x = (x | (x >> 16)); + } + return x; +} + +//! Puts bits on even positions filling gaps with 0s +template +inline T separate_bits_2d(T x) +{ + if constexpr (bitDepth > 32u) + { + x = (x | (x << 16)) & morton2d_mask(4); + } + if constexpr (bitDepth > 16u) + { + x = (x | (x << 8)) & morton2d_mask(3); + } + if constexpr (bitDepth > 8u) + { + x = (x | (x << 4)) & morton2d_mask(2); + } + x = (x | (x << 2)) & morton2d_mask(1); + x = (x | (x << 1)) & morton2d_mask(0); + + return x; +} +template +inline T separate_bits_3d(T x) +{ + if constexpr (bitDepth > 32u) + { + x = (x | (x << 32)) & morton3d_mask(4); + } + if constexpr (bitDepth > 16u) + { + x = (x | (x << 16)) & morton3d_mask(3); + } + if constexpr (bitDepth > 8u) + { + x = (x | (x << 8)) & morton3d_mask(2); + } + x = (x | (x << 4)) & morton3d_mask(1); + x = (x | (x << 2)) & morton3d_mask(0); + + return x; +} +template +inline T separate_bits_4d(T x) +{ + if constexpr (bitDepth > 32u) + { + x = (x | (x << 24)) & morton4d_mask(3); + } + if constexpr (bitDepth > 16u) + { + x = (x | (x << 12)) & morton4d_mask(2); + } + if constexpr (bitDepth > 8u) + { + x = (x | (x << 6)) & morton4d_mask(1); + } + x = (x | (x << 3)) & morton4d_mask(0); + + return x; +} +#endif +} + +template +T morton2d_decode_x(T _morton) { return impl::morton2d_decode(_morton); } +template +T morton2d_decode_y(T _morton) { return impl::morton2d_decode(_morton >> 1); } + +template +T morton2d_encode(T x, T y) { return impl::separate_bits_2d(x) | (impl::separate_bits_2d(y) << 1); } +template +T morton3d_encode(T x, T y, T z) { return impl::separate_bits_3d(x) | (impl::separate_bits_3d(y) << 1) | (impl::separate_bits_3d(z) << 2); } +template +T morton4d_encode(T x, T y, T z, T w) { return impl::separate_bits_4d(x) | (impl::separate_bits_4d(y) << 1) | (impl::separate_bits_4d(z) << 2) | (impl::separate_bits_4d(w) << 3); } + +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 8a7775c7a5..df61293d4a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -281,6 +281,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl") +#morton +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") #acceleration structures LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/acceleration_structures.hlsl") #colorspace From 1cc26bdcd583bbbc354c8c5e951f06e6cb1d3f28 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 2 Aug 2024 19:00:47 +0530 Subject: [PATCH 04/36] Fix HLSL morton code --- include/nbl/builtin/hlsl/math/morton.hlsl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 64b0b66cb7..4150af637a 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -21,7 +21,7 @@ namespace impl #ifdef __HLSL_VERSION template -T morton2d_mask(uint16_t _n) const +T morton2d_mask(uint16_t _n) { const static uint64_t mask[5] = { @@ -31,11 +31,11 @@ T morton2d_mask(uint16_t _n) const 0x00FF00FF00FF00FFull, 0x0000FFFF0000FFFFull }; - return static_cast(mask[_n]); + return mask[_n]; } template -T morton3d_mask(uint16_t _n) const +T morton3d_mask(uint16_t _n) { const static uint64_t mask[5] = { @@ -45,10 +45,10 @@ T morton3d_mask(uint16_t _n) const 0x001F0000FF0000FFull, 0x001F00000000FFFFull }; - return static_cast(mask[_n]); + return mask[_n]; } template -T morton4d_mask(uint16_t _n) const +T morton4d_mask(uint16_t _n) { const static uint64_t mask[4] = { @@ -57,7 +57,7 @@ T morton4d_mask(uint16_t _n) const 0x000F000F000F000Full, 0x000000FF000000FFull }; - return static_cast(mask[_n]); + return mask[_n]; } template From 6922d0c41b509a125be89d86627ba206d565b053 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Mon, 5 Aug 2024 19:02:04 +0530 Subject: [PATCH 05/36] Create geom_luma_meter and computeLuma --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 4e18655852..d2c33602c8 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -5,11 +5,56 @@ #ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ #define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/math/morton.hlsl" +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" + namespace nbl { -namespace hls +namespace hlsl +{ +namespace luma_meter +{ + +struct LumaMeteringWindow { + float32_t2 meteringWindowScale; + float32_t2 meteringWindowOffset; +}; + +template +struct geom_luma_meter { + using this_t = geom_luma_meter; + + static this_t create(NBL_REF_ARG(LumaMeteringWindow) window) + { + this_t retval; + retval.window = window; + return retval; + } + float32_t computeLuma(NBL_REF_ARG(TexAccessor) tex, uint32_t2 sampleCount, uint32_t2 sampleIndex, float32_t2 viewportSize) + { + float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f)); + float32_t2 samplePos = stride * sampleIndex; + float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize; + float32_t3 color = colorspace::eotf::sRGB(tex.get(uvPos)); + float32_t luma = dot(colorspace::sRGBtoXYZ[1], color); + + const float32_t minLuma = 1.0 / 4096.0; + const float32_t maxLuma = 32768.0; + + luma = clamp(luma, minLuma, maxLuma); + + return log2(luma / minLuma) / log2(maxLuma / minLuma); + } + + LumaMeteringWindow window; +}; +} } } From 603a92f87a5831dc491ff4e4b53e99f5af9a57ce Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:22:52 +0530 Subject: [PATCH 06/36] Add gatherLuma method --- include/nbl/asset/utils/IVirtualTexture.h | 4 +- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 54 +++++++++++++++++-- include/nbl/builtin/hlsl/math/morton.hlsl | 2 +- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/utils/IVirtualTexture.h b/include/nbl/asset/utils/IVirtualTexture.h index 64ea49cbe7..b715c40cfc 100644 --- a/include/nbl/asset/utils/IVirtualTexture.h +++ b/include/nbl/asset/utils/IVirtualTexture.h @@ -922,7 +922,7 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa storage->incrTileCounter(neededPhysPages); return offsetToTextureData( - page_tab_offset_t(core::morton2d_decode_x(addr), core::morton2d_decode_y(addr), pgtLayer), + page_tab_offset_t(hlsl::morton2d_decode_x(addr), hlsl::morton2d_decode_y(addr), pgtLayer), extent, _subres.levelCount, _wrapu, @@ -934,7 +934,7 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa { uint32_t sz = computeSquareSz(_addr.origsize_x, _addr.origsize_y); sz *= sz; - const uint32_t addr = core::morton2d_encode(_addr.pgTab_x, _addr.pgTab_y); + const uint32_t addr = hlsl::morton2d_encode(_addr.pgTab_x, _addr.pgTab_y); core::address_allocator_traits::multi_free_addr(m_pageTableLayerAllocators[_addr.pgTab_layer], 1u, &addr, &sz); diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index d2c33602c8..7ed9604c4f 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -7,6 +7,9 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/workgroup/basic.hlsl" +#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" #include "nbl/builtin/hlsl/type_traits.hlsl" #include "nbl/builtin/hlsl/math/morton.hlsl" #include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" @@ -25,9 +28,9 @@ struct LumaMeteringWindow float32_t2 meteringWindowOffset; }; -template +template struct geom_luma_meter { - using this_t = geom_luma_meter; + using this_t = geom_luma_meter; static this_t create(NBL_REF_ARG(LumaMeteringWindow) window) { @@ -36,7 +39,18 @@ struct geom_luma_meter { return retval; } - float32_t computeLuma(NBL_REF_ARG(TexAccessor) tex, uint32_t2 sampleCount, uint32_t2 sampleIndex, float32_t2 viewportSize) + float32_t reduction(float32_t value, NBL_REF_ARG(SharedAccessor) sdata) + { + return workgroup::reduction < plus < float32_t >, GroupSize >:: + template __call (value, sdata); + } + + float32_t computeLuma( + NBL_REF_ARG(TexAccessor) tex, + uint32_t2 sampleCount, + uint32_t2 sampleIndex, + float32_t2 viewportSize + ) { float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f)); float32_t2 samplePos = stride * sampleIndex; @@ -52,6 +66,40 @@ struct geom_luma_meter { return log2(luma / minLuma) / log2(maxLuma / minLuma); } + void gatherLuma( + NBL_REF_ARG(ValueAccessor) val, + NBL_REF_ARG(TexAccessor) tex, + NBL_REF_ARG(SharedAccessor) sdata, + uint32_t2 sampleCount, + float32_t2 viewportSize + ) { + uint32_t2 coord = { + morton2d_decode_x(glsl::gl_LocalInvocationIndex()), + morton2d_decode_y(glsl::gl_LocalInvocationIndex()) + }; + uint32_t tid = workgroup::SubgroupContiguousIndex(); + + uint32_t2 sampleIndex = coord * GroupSize + float32_t2(glsl::gl_SubgroupID() + 1, glsl::gl_SubgroupInvocationID() + 1); + float32_t luma = 0.0f; + + if (sampleIndex.x <= sampleCount.x && sampleIndex.y <= sampleCount.y) { + luma = computeLuma(tex, sampleCount, sampleIndex, viewportSize); + float32_t lumaSum = reduction(luma, sdata); + + sdata.workgroupExecutionAndMemoryBarrier(); + + if (tid == GroupSize - 1) { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + uint32_t lumaSumBitPattern = uint32_t(clamp(lumaSum, 0.f, float((1 << fixedPointBitsLeft) - 1))); + uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); + uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); + + val.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); + } + } + } + LumaMeteringWindow window; }; } diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 4150af637a..1f35016cb6 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,7 +13,7 @@ namespace nbl { -namespace core +namespace hlsl { namespace impl From 810a6ac1cc2ff6662dca36edd0413288b4f1b1ea Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 8 Aug 2024 16:29:20 +0530 Subject: [PATCH 07/36] Add getGatheredLuma() --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 7ed9604c4f..21bd813439 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -32,10 +32,12 @@ template; - static this_t create(NBL_REF_ARG(LumaMeteringWindow) window) + static this_t create(NBL_REF_ARG(LumaMeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) { this_t retval; retval.window = window; + retval.minLuma = lumaMinimum; + retval.maxLuma = lumaMaximum; return retval; } @@ -58,9 +60,6 @@ struct geom_luma_meter { float32_t3 color = colorspace::eotf::sRGB(tex.get(uvPos)); float32_t luma = dot(colorspace::sRGBtoXYZ[1], color); - const float32_t minLuma = 1.0 / 4096.0; - const float32_t maxLuma = 32768.0; - luma = clamp(luma, minLuma, maxLuma); return log2(luma / minLuma) / log2(maxLuma / minLuma); @@ -72,7 +71,8 @@ struct geom_luma_meter { NBL_REF_ARG(SharedAccessor) sdata, uint32_t2 sampleCount, float32_t2 viewportSize - ) { + ) + { uint32_t2 coord = { morton2d_decode_x(glsl::gl_LocalInvocationIndex()), morton2d_decode_y(glsl::gl_LocalInvocationIndex()) @@ -91,7 +91,9 @@ struct geom_luma_meter { if (tid == GroupSize - 1) { uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); - uint32_t lumaSumBitPattern = uint32_t(clamp(lumaSum, 0.f, float((1 << fixedPointBitsLeft) - 1))); + + uint32_t lumaSumBitPattern = uint32_t(clamp((lumaSum - log2(minLuma)) * (log2(maxLuma) - log2(minLuma)), 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); @@ -100,7 +102,18 @@ struct geom_luma_meter { } } + float32_t getGatheredLuma( + NBL_REF_ARG(ValueAccessor) val, + uint32_t2 sampleCount + ) + { + uint32_t lumaSumBitPattern = val.get(glsl::gl_SubgroupInvocationID()); + float32_t lumaSumValue = float32_t(lumaSumBitPattern) / (log2(maxLuma) - log2(minLuma)) + log2(minLuma); + return glsl::subgroupAdd(lumaSumValue) / (sampleCount.x * sampleCount.y); + } + LumaMeteringWindow window; + float32_t minLuma, maxLuma; }; } } From 69a73c1d90a0702894ecead0de1455d459d8b2ca Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 8 Aug 2024 16:59:59 +0530 Subject: [PATCH 08/36] Add reinhard and aces hlsl operators --- .../builtin/hlsl/tonemapper/operators.hlsl | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index 5ebb5b2ffa..cc5728e9ff 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -5,10 +5,67 @@ #ifndef _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ #define _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + namespace nbl { -namespace hls +namespace hlsl +{ + +struct ReinhardParams +{ + float32_t keyAndManualLinearExposure; + float32_t rcpWhite2; +}; + +struct ACESParams +{ + float32_t gamma; // 1.0 + float32_t exposure; // actualExposure+midGrayLog2 +}; + + +float32_t3 reinhard(ReinhardParams params, float32_t3 rawCIEXYZcolor) +{ + float32_t exposureFactors = params.keyAndManualLinearExposure; + float32_t exposedLuma = rawCIEXYZcolor.y * exposureFactors; + float32_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * params.rcpWhite2) / (1.0 + exposedLuma)); + return rawCIEXYZcolor * colorMultiplier; +} + +float32_t3 aces(ACESParams params, float32_t3 rawCIEXYZcolor) { + float32_t3 tonemapped = rawCIEXYZcolor; + if (tonemapped.y > 1.175494351e-38) + tonemapped *= exp2(log2(tonemapped.y) * (params.gamma - 1.0) + (params.exposure) * params.gamma); + + // XYZ => RRT_SAT + // this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t) + const float32_t3x3 XYZ_RRT_Input = float32_t3x3( + float32_t3(1.594168310, -0.262608051, -0.231993079), + float32_t3(-0.6332771780, 1.5840380200, 0.0164147373), + float32_t3(0.00892840419, 0.03648501260, 0.87711471300) + ); + + // this is obviously fitted to some particular simulated sensor/film and display + float32_t3 v = mul(XYZ_RRT_Input, tonemapped); + float32_t3 a = v * (v + float32_t3(0.0245786)) - float32_t3(0.000090537); + float32_t3 b = v * (v * float32_t(0.983729) + float32_t3(0.4329510)) + float32_t3(0.238081); + v = a / b; + + // ODT_SAT => XYZ + // this seems to be a matrix for some hybrid colorspace, coefficients are similar to AdobeRGB,BT2020 and ACEScc(t) + const float32_t3x3 ODT_XYZ_Output = float32_t3x3( + float32_t3(0.624798000, 0.164064825, 0.161605373), + float32_t3(0.268048108, 0.674283803, 0.057667464), + float32_t3(0.0157514643, 0.0526682511, 1.0204007600) + ); + return mul(ODT_XYZ_Output, v); +} + +// ideas for more operators https://web.archive.org/web/20191226154550/http://cs.columbia.edu/CAVE/software/softlib/dorf.php +// or get proper ACES RRT and ODTs +// https://partnerhelp.netflixstudios.com/hc/en-us/articles/360000622487-I-m-using-ACES-Which-Output-Transform-should-I-use- } } From 4c70cf5bb919abab9c82e36320de45be88fe02ee Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:47:49 +0530 Subject: [PATCH 09/36] cast mask values to correct type --- include/nbl/builtin/hlsl/math/morton.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 1f35016cb6..1cd2105dc5 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -31,7 +31,7 @@ T morton2d_mask(uint16_t _n) 0x00FF00FF00FF00FFull, 0x0000FFFF0000FFFFull }; - return mask[_n]; + return (T)mask[_n]; } template @@ -45,7 +45,7 @@ T morton3d_mask(uint16_t _n) 0x001F0000FF0000FFull, 0x001F00000000FFFFull }; - return mask[_n]; + return (T)mask[_n]; } template T morton4d_mask(uint16_t _n) @@ -57,7 +57,7 @@ T morton4d_mask(uint16_t _n) 0x000F000F000F000Full, 0x000000FF000000FFull }; - return mask[_n]; + return (T)mask[_n]; } template From d9d6dd8c19a1c896ea03dce1182791bfb2e1834b Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:35:19 +0530 Subject: [PATCH 10/36] Add create methods to tonemapper params --- .../builtin/hlsl/tonemapper/operators.hlsl | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index cc5728e9ff..daff652bbd 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -11,15 +11,34 @@ namespace nbl { namespace hlsl { +namespace tonemapper +{ struct ReinhardParams { + using this_t = ReinhardParams; + static this_t create(float EV, float key = 0.18f, float WhitePointRelToEV = 16.f) + { + this_t retval; + retval.keyAndManualLinearExposure = key * exp2(EV); + retval.rcpWhite2 = 1.f / (WhitePointRelToEV * WhitePointRelToEV); + return retval; + } + float32_t keyAndManualLinearExposure; float32_t rcpWhite2; }; struct ACESParams { + using this_t = ACESParams; + static this_t create(float EV, float key = 0.18f, float Contrast = 1.f) { + this_t retval; + retval.gamma = Contrast; + retval.exposure = EV + log2(key * 0.77321666f); + return retval; + } + float32_t gamma; // 1.0 float32_t exposure; // actualExposure+midGrayLog2 }; @@ -49,8 +68,8 @@ float32_t3 aces(ACESParams params, float32_t3 rawCIEXYZcolor) // this is obviously fitted to some particular simulated sensor/film and display float32_t3 v = mul(XYZ_RRT_Input, tonemapped); - float32_t3 a = v * (v + float32_t3(0.0245786)) - float32_t3(0.000090537); - float32_t3 b = v * (v * float32_t(0.983729) + float32_t3(0.4329510)) + float32_t3(0.238081); + float32_t3 a = v * (v + float32_t3(0.0245786, 0.0245786, 0.0245786)) - float32_t3(0.000090537, 0.000090537, 0.000090537); + float32_t3 b = v * (v * float32_t3(0.983729, 0.983729, 0.983729) + float32_t3(0.4329510, 0.4329510, 0.4329510)) + float32_t3(0.238081, 0.238081, 0.238081); v = a / b; // ODT_SAT => XYZ @@ -67,6 +86,7 @@ float32_t3 aces(ACESParams params, float32_t3 rawCIEXYZcolor) // or get proper ACES RRT and ODTs // https://partnerhelp.netflixstudios.com/hc/en-us/articles/360000622487-I-m-using-ACES-Which-Output-Transform-should-I-use- +} } } From 305f7e7430077c72a9bbf0b814ed5a6bd9e691a6 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:35:49 +0530 Subject: [PATCH 11/36] Remove getGatheredLuma from luma_meter --- include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 21bd813439..94b898670b 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -13,6 +13,7 @@ #include "nbl/builtin/hlsl/type_traits.hlsl" #include "nbl/builtin/hlsl/math/morton.hlsl" #include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" namespace nbl @@ -57,7 +58,7 @@ struct geom_luma_meter { float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f)); float32_t2 samplePos = stride * sampleIndex; float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize; - float32_t3 color = colorspace::eotf::sRGB(tex.get(uvPos)); + float32_t3 color = colorspace::oetf::sRGB(tex.get(uvPos)); float32_t luma = dot(colorspace::sRGBtoXYZ[1], color); luma = clamp(luma, minLuma, maxLuma); @@ -102,16 +103,6 @@ struct geom_luma_meter { } } - float32_t getGatheredLuma( - NBL_REF_ARG(ValueAccessor) val, - uint32_t2 sampleCount - ) - { - uint32_t lumaSumBitPattern = val.get(glsl::gl_SubgroupInvocationID()); - float32_t lumaSumValue = float32_t(lumaSumBitPattern) / (log2(maxLuma) - log2(minLuma)) + log2(minLuma); - return glsl::subgroupAdd(lumaSumValue) / (sampleCount.x * sampleCount.y); - } - LumaMeteringWindow window; float32_t minLuma, maxLuma; }; From 3f4f6e93163e5c0c1a67f88b8906a07916ddbe84 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:28:48 +0530 Subject: [PATCH 12/36] Separate LumaMeteringWindow into a common header --- .../nbl/builtin/hlsl/luma_meter/common.hlsl | 27 +++++++++++++++++++ .../builtin/hlsl/luma_meter/luma_meter.hlsl | 12 +++------ src/nbl/builtin/CMakeLists.txt | 1 + 3 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 include/nbl/builtin/hlsl/luma_meter/common.hlsl diff --git a/include/nbl/builtin/hlsl/luma_meter/common.hlsl b/include/nbl/builtin/hlsl/luma_meter/common.hlsl new file mode 100644 index 0000000000..210039390e --- /dev/null +++ b/include/nbl/builtin/hlsl/luma_meter/common.hlsl @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace luma_meter +{ + +struct MeteringWindow +{ + float32_t2 meteringWindowScale; + float32_t2 meteringWindowOffset; +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 94b898670b..e865d61c0d 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -5,7 +5,6 @@ #ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ #define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" #include "nbl/builtin/hlsl/workgroup/basic.hlsl" @@ -15,6 +14,7 @@ #include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" #include "nbl/builtin/hlsl/colorspace/OETF.hlsl" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" namespace nbl { @@ -23,17 +23,11 @@ namespace hlsl namespace luma_meter { -struct LumaMeteringWindow -{ - float32_t2 meteringWindowScale; - float32_t2 meteringWindowOffset; -}; - template struct geom_luma_meter { using this_t = geom_luma_meter; - static this_t create(NBL_REF_ARG(LumaMeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) + static this_t create(NBL_REF_ARG(MeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) { this_t retval; retval.window = window; @@ -103,7 +97,7 @@ struct geom_luma_meter { } } - LumaMeteringWindow window; + MeteringWindow window; float32_t minLuma, maxLuma; }; } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index df61293d4a..b4346c428e 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -35,6 +35,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ref.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ptr.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/bda_accessor.hlsl") # luma metering +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/luma_meter/common.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/luma_meter/luma_meter.hlsl") # tonemapper LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tonemapper/operators.hlsl") From 515512a9dc5287dd68acce86205c53b5b219ba54 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:32:27 +0530 Subject: [PATCH 13/36] Simplify luma_meter naming --- include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index e865d61c0d..fb07acb8f4 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -24,8 +24,8 @@ namespace luma_meter { template -struct geom_luma_meter { - using this_t = geom_luma_meter; +struct geom_meter { + using this_t = geom_meter; static this_t create(NBL_REF_ARG(MeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) { From 1919e53ed6ecb319f7892005d0faad86706288a2 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:06:03 +0530 Subject: [PATCH 14/36] Simplify morton code --- include/nbl/builtin/hlsl/math/morton.hlsl | 135 +--------------------- 1 file changed, 6 insertions(+), 129 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 1cd2105dc5..c0769fc88b 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -19,9 +19,8 @@ namespace hlsl namespace impl { -#ifdef __HLSL_VERSION template -T morton2d_mask(uint16_t _n) +NBL_CONSTEXPR_FUNC T morton2d_mask(uint16_t _n) { const static uint64_t mask[5] = { @@ -31,11 +30,11 @@ T morton2d_mask(uint16_t _n) 0x00FF00FF00FF00FFull, 0x0000FFFF0000FFFFull }; - return (T)mask[_n]; + return nbl::hlsl::_static_cast(mask[_n]); } template -T morton3d_mask(uint16_t _n) +NBL_CONSTEXPR_FUNC T morton3d_mask(uint16_t _n) { const static uint64_t mask[5] = { @@ -45,10 +44,10 @@ T morton3d_mask(uint16_t _n) 0x001F0000FF0000FFull, 0x001F00000000FFFFull }; - return (T)mask[_n]; + return nbl::hlsl::_static_cast(mask[_n]); } template -T morton4d_mask(uint16_t _n) +NBL_CONSTEXPR_FUNC T morton4d_mask(uint16_t _n) { const static uint64_t mask[4] = { @@ -57,7 +56,7 @@ T morton4d_mask(uint16_t _n) 0x000F000F000F000Full, 0x000000FF000000FFull }; - return (T)mask[_n]; + return nbl::hlsl::_static_cast(mask[_n]); } template @@ -141,128 +140,6 @@ inline T separate_bits_4d(T x) return x; } -#else -template -constexpr T morton2d_mask(uint8_t _n) -{ - constexpr uint64_t mask[5] = - { - 0x5555555555555555ull, - 0x3333333333333333ull, - 0x0F0F0F0F0F0F0F0Full, - 0x00FF00FF00FF00FFull, - 0x0000FFFF0000FFFFull - }; - return static_cast(mask[_n]); -} -template -constexpr T morton3d_mask(uint8_t _n) -{ - constexpr uint64_t mask[5] = - { - 0x1249249249249249ull, - 0x10C30C30C30C30C3ull, - 0x010F00F00F00F00Full, - 0x001F0000FF0000FFull, - 0x001F00000000FFFFull - }; - return static_cast(mask[_n]); -} -template -constexpr T morton4d_mask(uint8_t _n) -{ - constexpr uint64_t mask[4] = - { - 0x1111111111111111ull, - 0x0303030303030303ull, - 0x000F000F000F000Full, - 0x000000FF000000FFull - }; - return static_cast(mask[_n]); -} - -template -inline T morton2d_decode(T x) -{ - x = x & morton2d_mask(0); - x = (x | (x >> 1)) & morton2d_mask(1); - x = (x | (x >> 2)) & morton2d_mask(2); - if constexpr (bitDepth > 8u) - { - x = (x | (x >> 4)) & morton2d_mask(3); - } - if constexpr (bitDepth > 16u) - { - x = (x | (x >> 8)) & morton2d_mask(4); - } - if constexpr (bitDepth > 32u) - { - x = (x | (x >> 16)); - } - return x; -} - -//! Puts bits on even positions filling gaps with 0s -template -inline T separate_bits_2d(T x) -{ - if constexpr (bitDepth > 32u) - { - x = (x | (x << 16)) & morton2d_mask(4); - } - if constexpr (bitDepth > 16u) - { - x = (x | (x << 8)) & morton2d_mask(3); - } - if constexpr (bitDepth > 8u) - { - x = (x | (x << 4)) & morton2d_mask(2); - } - x = (x | (x << 2)) & morton2d_mask(1); - x = (x | (x << 1)) & morton2d_mask(0); - - return x; -} -template -inline T separate_bits_3d(T x) -{ - if constexpr (bitDepth > 32u) - { - x = (x | (x << 32)) & morton3d_mask(4); - } - if constexpr (bitDepth > 16u) - { - x = (x | (x << 16)) & morton3d_mask(3); - } - if constexpr (bitDepth > 8u) - { - x = (x | (x << 8)) & morton3d_mask(2); - } - x = (x | (x << 4)) & morton3d_mask(1); - x = (x | (x << 2)) & morton3d_mask(0); - - return x; -} -template -inline T separate_bits_4d(T x) -{ - if constexpr (bitDepth > 32u) - { - x = (x | (x << 24)) & morton4d_mask(3); - } - if constexpr (bitDepth > 16u) - { - x = (x | (x << 12)) & morton4d_mask(2); - } - if constexpr (bitDepth > 8u) - { - x = (x | (x << 6)) & morton4d_mask(1); - } - x = (x | (x << 3)) & morton4d_mask(0); - - return x; -} -#endif } template From 4c582382e8adca012b959577367138a8f1a92dfd Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:09:24 +0530 Subject: [PATCH 15/36] Add missing comment --- include/nbl/builtin/hlsl/tonemapper/operators.hlsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index daff652bbd..1481fd92b2 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -35,7 +35,8 @@ struct ACESParams static this_t create(float EV, float key = 0.18f, float Contrast = 1.f) { this_t retval; retval.gamma = Contrast; - retval.exposure = EV + log2(key * 0.77321666f); + const float reinhardMatchCorrection = 0.77321666f; // middle grays get exposed to different values between tonemappers given the same key + retval.exposure = EV + log2(key * reinhardMatchCorrection); return retval; } From 3c3f8b84025dfddb3464d4bc9ed5ca76f651b07c Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:09:02 +0530 Subject: [PATCH 16/36] Refactor tonemapping operators --- .../builtin/hlsl/tonemapper/operators.hlsl | 106 +++++++++--------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index 1481fd92b2..854f78e302 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -6,6 +6,7 @@ #define _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" namespace nbl { @@ -14,10 +15,13 @@ namespace hlsl namespace tonemapper { -struct ReinhardParams +template +struct Reinhard { - using this_t = ReinhardParams; - static this_t create(float EV, float key = 0.18f, float WhitePointRelToEV = 16.f) + using float_t = enable_if_t::value, T>; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using this_t = Reinhard; + static this_t create(float_t EV, float_t key = 0.18f, float_t WhitePointRelToEV = 16.f) { this_t retval; retval.keyAndManualLinearExposure = key * exp2(EV); @@ -25,63 +29,65 @@ struct ReinhardParams return retval; } - float32_t keyAndManualLinearExposure; - float32_t rcpWhite2; + float_t3 operator()(float_t3 rawCIEXYZcolor) { + float_t exposureFactors = keyAndManualLinearExposure; + float_t exposedLuma = rawCIEXYZcolor.y * exposureFactors; + float_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * rcpWhite2) / (1.0 + exposedLuma)); + return rawCIEXYZcolor * colorMultiplier; + } + + float_t3 keyAndManualLinearExposure; + float_t3 rcpWhite2; }; -struct ACESParams +template +struct ACES { - using this_t = ACESParams; - static this_t create(float EV, float key = 0.18f, float Contrast = 1.f) { + using float_t = enable_if_t::value, T>; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using float_t3x3 = typename conditional, float32_t3x3, float16_t3x3>::type; + + using this_t = ACES; + static this_t create(float_t EV, float_t key = 0.18f, float_t Contrast = 1.f) { this_t retval; retval.gamma = Contrast; - const float reinhardMatchCorrection = 0.77321666f; // middle grays get exposed to different values between tonemappers given the same key + const float_t reinhardMatchCorrection = 0.77321666f; // middle grays get exposed to different values between tonemappers given the same key retval.exposure = EV + log2(key * reinhardMatchCorrection); return retval; } - float32_t gamma; // 1.0 - float32_t exposure; // actualExposure+midGrayLog2 -}; - - -float32_t3 reinhard(ReinhardParams params, float32_t3 rawCIEXYZcolor) -{ - float32_t exposureFactors = params.keyAndManualLinearExposure; - float32_t exposedLuma = rawCIEXYZcolor.y * exposureFactors; - float32_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * params.rcpWhite2) / (1.0 + exposedLuma)); - return rawCIEXYZcolor * colorMultiplier; -} + float_t3 operator()(float_t3 rawCIEXYZcolor) { + float_t3 tonemapped = rawCIEXYZcolor; + if (tonemapped.y > 1.175494351e-38) + tonemapped *= exp2(log2(tonemapped.y) * (gamma - 1.0) + (exposure) * gamma); + + // XYZ => RRT_SAT + // this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t) + const float_t3x3 XYZ_RRT_Input = float_t3x3( + float_t3(1.594168310, -0.262608051, -0.231993079), + float_t3(-0.6332771780, 1.5840380200, 0.0164147373), + float_t3(0.00892840419, 0.03648501260, 0.87711471300) + ); + + // this is obviously fitted to some particular simulated sensor/film and display + float_t3 v = mul(XYZ_RRT_Input, tonemapped); + float_t3 a = v * (v + float_t3(0.0245786, 0.0245786, 0.0245786)) - float_t3(0.000090537, 0.000090537, 0.000090537); + float_t3 b = v * (v * float_t3(0.983729, 0.983729, 0.983729) + float_t3(0.4329510, 0.4329510, 0.4329510)) + float_t3(0.238081, 0.238081, 0.238081); + v = a / b; + + // ODT_SAT => XYZ + // this seems to be a matrix for some hybrid colorspace, coefficients are similar to AdobeRGB,BT2020 and ACEScc(t) + const float_t3x3 ODT_XYZ_Output = float_t3x3( + float_t3(0.624798000, 0.164064825, 0.161605373), + float_t3(0.268048108, 0.674283803, 0.057667464), + float_t3(0.0157514643, 0.0526682511, 1.0204007600) + ); + return mul(ODT_XYZ_Output, v); + } -float32_t3 aces(ACESParams params, float32_t3 rawCIEXYZcolor) -{ - float32_t3 tonemapped = rawCIEXYZcolor; - if (tonemapped.y > 1.175494351e-38) - tonemapped *= exp2(log2(tonemapped.y) * (params.gamma - 1.0) + (params.exposure) * params.gamma); - - // XYZ => RRT_SAT - // this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t) - const float32_t3x3 XYZ_RRT_Input = float32_t3x3( - float32_t3(1.594168310, -0.262608051, -0.231993079), - float32_t3(-0.6332771780, 1.5840380200, 0.0164147373), - float32_t3(0.00892840419, 0.03648501260, 0.87711471300) - ); - - // this is obviously fitted to some particular simulated sensor/film and display - float32_t3 v = mul(XYZ_RRT_Input, tonemapped); - float32_t3 a = v * (v + float32_t3(0.0245786, 0.0245786, 0.0245786)) - float32_t3(0.000090537, 0.000090537, 0.000090537); - float32_t3 b = v * (v * float32_t3(0.983729, 0.983729, 0.983729) + float32_t3(0.4329510, 0.4329510, 0.4329510)) + float32_t3(0.238081, 0.238081, 0.238081); - v = a / b; - - // ODT_SAT => XYZ - // this seems to be a matrix for some hybrid colorspace, coefficients are similar to AdobeRGB,BT2020 and ACEScc(t) - const float32_t3x3 ODT_XYZ_Output = float32_t3x3( - float32_t3(0.624798000, 0.164064825, 0.161605373), - float32_t3(0.268048108, 0.674283803, 0.057667464), - float32_t3(0.0157514643, 0.0526682511, 1.0204007600) - ); - return mul(ODT_XYZ_Output, v); -} + float_t gamma; // 1.0 + float_t exposure; // actualExposure+midGrayLog2 +}; // ideas for more operators https://web.archive.org/web/20191226154550/http://cs.columbia.edu/CAVE/software/softlib/dorf.php // or get proper ACES RRT and ODTs From b0e07505a374d3e81e18e9e71c39152e4599051c Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:17:38 +0530 Subject: [PATCH 17/36] Small fixes --- include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index fb07acb8f4..af128b0f98 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -27,7 +27,7 @@ template; - static this_t create(NBL_REF_ARG(MeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) + static this_t create(NBL_CONST_REF_ARG(MeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) { this_t retval; retval.window = window; @@ -68,11 +68,12 @@ struct geom_meter { float32_t2 viewportSize ) { + + uint32_t tid = workgroup::SubgroupContiguousIndex(); uint32_t2 coord = { - morton2d_decode_x(glsl::gl_LocalInvocationIndex()), - morton2d_decode_y(glsl::gl_LocalInvocationIndex()) + morton2d_decode_x(tid), + morton2d_decode_y(tid) }; - uint32_t tid = workgroup::SubgroupContiguousIndex(); uint32_t2 sampleIndex = coord * GroupSize + float32_t2(glsl::gl_SubgroupID() + 1, glsl::gl_SubgroupInvocationID() + 1); float32_t luma = 0.0f; @@ -81,8 +82,6 @@ struct geom_meter { luma = computeLuma(tex, sampleCount, sampleIndex, viewportSize); float32_t lumaSum = reduction(luma, sdata); - sdata.workgroupExecutionAndMemoryBarrier(); - if (tid == GroupSize - 1) { uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); From e8e46c9d042e76adb3bfd449982fcff70986cfba Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:20:32 +0530 Subject: [PATCH 18/36] Use promote to simplify code --- include/nbl/builtin/hlsl/tonemapper/operators.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index 854f78e302..e5e6a9a97c 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -71,8 +71,8 @@ struct ACES // this is obviously fitted to some particular simulated sensor/film and display float_t3 v = mul(XYZ_RRT_Input, tonemapped); - float_t3 a = v * (v + float_t3(0.0245786, 0.0245786, 0.0245786)) - float_t3(0.000090537, 0.000090537, 0.000090537); - float_t3 b = v * (v * float_t3(0.983729, 0.983729, 0.983729) + float_t3(0.4329510, 0.4329510, 0.4329510)) + float_t3(0.238081, 0.238081, 0.238081); + float_t3 a = v * (v + promote(0.0245786)) - promote(0.000090537); + float_t3 b = v * (v * promote(0.983729) + promote(0.4329510)) + promote(0.238081); v = a / b; // ODT_SAT => XYZ From ee5affe6f20f25e1c7eb2675e07fe340be9204fb Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:07:34 +0530 Subject: [PATCH 19/36] Add static create to MeteringWindow --- include/nbl/builtin/hlsl/luma_meter/common.hlsl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/nbl/builtin/hlsl/luma_meter/common.hlsl b/include/nbl/builtin/hlsl/luma_meter/common.hlsl index 210039390e..55d1713619 100644 --- a/include/nbl/builtin/hlsl/luma_meter/common.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/common.hlsl @@ -16,8 +16,16 @@ namespace luma_meter struct MeteringWindow { + using this_t = MeteringWindow; float32_t2 meteringWindowScale; float32_t2 meteringWindowOffset; + + static this_t create(float32_t2 scale, float32_t2 offset) { + this_t retval; + retval.meteringWindowScale = scale; + retval.meteringWindowOffset = offset; + return retval; + } }; } From 56389f45a6f5689889d232fb051a15b0001e43f7 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:31:28 +0530 Subject: [PATCH 20/36] Infer sample count from viewportSize --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 49 +++++++------------ .../builtin/hlsl/tonemapper/operators.hlsl | 4 +- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index af128b0f98..23deac8bbe 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -27,12 +27,10 @@ template; - static this_t create(NBL_CONST_REF_ARG(MeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum) + static this_t create(float32_t2 lumaMinMax) { this_t retval; - retval.window = window; - retval.minLuma = lumaMinimum; - retval.maxLuma = lumaMaximum; + retval.lumaMinMax = lumaMinMax; return retval; } @@ -43,61 +41,52 @@ struct geom_meter { } float32_t computeLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(TexAccessor) tex, - uint32_t2 sampleCount, - uint32_t2 sampleIndex, - float32_t2 viewportSize + float32_t2 shiftedCoord ) { - float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f)); - float32_t2 samplePos = stride * sampleIndex; - float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize; + float32_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; float32_t3 color = colorspace::oetf::sRGB(tex.get(uvPos)); float32_t luma = dot(colorspace::sRGBtoXYZ[1], color); - luma = clamp(luma, minLuma, maxLuma); + luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); - return log2(luma / minLuma) / log2(maxLuma / minLuma); + return log2(luma / lumaMinMax.x) / log2(lumaMinMax.y / lumaMinMax.x); } void gatherLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(ValueAccessor) val, NBL_REF_ARG(TexAccessor) tex, NBL_REF_ARG(SharedAccessor) sdata, - uint32_t2 sampleCount, - float32_t2 viewportSize + float32_t2 tileOffset ) { - uint32_t tid = workgroup::SubgroupContiguousIndex(); uint32_t2 coord = { morton2d_decode_x(tid), morton2d_decode_y(tid) }; - uint32_t2 sampleIndex = coord * GroupSize + float32_t2(glsl::gl_SubgroupID() + 1, glsl::gl_SubgroupInvocationID() + 1); float32_t luma = 0.0f; + luma = computeLuma(window, tex, tileOffset + (float32_t2)(coord)); + float32_t lumaSum = reduction(luma, sdata); - if (sampleIndex.x <= sampleCount.x && sampleIndex.y <= sampleCount.y) { - luma = computeLuma(tex, sampleCount, sampleIndex, viewportSize); - float32_t lumaSum = reduction(luma, sdata); - - if (tid == GroupSize - 1) { - uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); - uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + if (tid == GroupSize - 1) { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); - uint32_t lumaSumBitPattern = uint32_t(clamp((lumaSum - log2(minLuma)) * (log2(maxLuma) - log2(minLuma)), 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + uint32_t lumaSumBitPattern = uint32_t(clamp((lumaSum - log2(lumaMinMax.x)) * (log2(lumaMinMax.y) - log2(lumaMinMax.x)), 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); - uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); - uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); + uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); + uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); - val.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); - } + val.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); } } - MeteringWindow window; - float32_t minLuma, maxLuma; + float32_t2 lumaMinMax; }; } } diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index e5e6a9a97c..824e31d68a 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -36,8 +36,8 @@ struct Reinhard return rawCIEXYZcolor * colorMultiplier; } - float_t3 keyAndManualLinearExposure; - float_t3 rcpWhite2; + float_t keyAndManualLinearExposure; + float_t rcpWhite2; }; template From 23771d1610b50e2af60b2f4661d11c06e50d854f Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 22 Aug 2024 23:02:11 +0530 Subject: [PATCH 21/36] Rename gatherLuma, add toXYZ method and templatize the float type --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 23deac8bbe..b0b19b3a82 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -11,9 +11,6 @@ #include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" #include "nbl/builtin/hlsl/type_traits.hlsl" #include "nbl/builtin/hlsl/math/morton.hlsl" -#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" -#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" -#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" #include "nbl/builtin/hlsl/luma_meter/common.hlsl" namespace nbl @@ -25,42 +22,45 @@ namespace luma_meter template struct geom_meter { + using float_t = typename SharedAccessor::type; + using float_t2 = typename conditional, float32_t2, float16_t2>::type; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; using this_t = geom_meter; - static this_t create(float32_t2 lumaMinMax) + static this_t create(float_t2 lumaMinMax) { this_t retval; retval.lumaMinMax = lumaMinMax; return retval; } - float32_t reduction(float32_t value, NBL_REF_ARG(SharedAccessor) sdata) + float_t reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { - return workgroup::reduction < plus < float32_t >, GroupSize >:: + return workgroup::reduction < plus < float_t >, GroupSize >:: template __call (value, sdata); } - float32_t computeLuma( + float_t computeLumaLog2( NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(TexAccessor) tex, - float32_t2 shiftedCoord + float_t2 shiftedCoord ) { - float32_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; - float32_t3 color = colorspace::oetf::sRGB(tex.get(uvPos)); - float32_t luma = dot(colorspace::sRGBtoXYZ[1], color); + float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; + float_t3 color = tex.get(uvPos); + float_t luma = TexAccessor::toXYZ(color); luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); - return log2(luma / lumaMinMax.x) / log2(lumaMinMax.y / lumaMinMax.x); + return max(log2(luma), log2(lumaMinMax.x)); } - void gatherLuma( + void sampleLuma( NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(ValueAccessor) val, NBL_REF_ARG(TexAccessor) tex, NBL_REF_ARG(SharedAccessor) sdata, - float32_t2 tileOffset + float_t2 tileOffset ) { uint32_t tid = workgroup::SubgroupContiguousIndex(); @@ -69,9 +69,9 @@ struct geom_meter { morton2d_decode_y(tid) }; - float32_t luma = 0.0f; - luma = computeLuma(window, tex, tileOffset + (float32_t2)(coord)); - float32_t lumaSum = reduction(luma, sdata); + float_t luma = 0.0f; + luma = computeLumaLog2(window, tex, tileOffset + (float32_t2)(coord)); + float_t lumaSum = reduction(luma, sdata); if (tid == GroupSize - 1) { uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); @@ -86,7 +86,7 @@ struct geom_meter { } } - float32_t2 lumaMinMax; + float_t2 lumaMinMax; }; } } From ac390393cca2c89237532b57f12d95cc5584f0be Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 27 Aug 2024 00:41:14 +0530 Subject: [PATCH 22/36] Add uploadFloat, downloadFloat and gatherLuma --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 63 ++++++++++++++++--- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index b0b19b3a82..c39b2e3ab6 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_arithmetic.hlsl" #include "nbl/builtin/hlsl/workgroup/basic.hlsl" #include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" #include "nbl/builtin/hlsl/type_traits.hlsl" @@ -27,10 +28,11 @@ struct geom_meter { using float_t3 = typename conditional, float32_t3, float16_t3>::type; using this_t = geom_meter; - static this_t create(float_t2 lumaMinMax) + static this_t create(float_t2 lumaMinMax, float_t sampleCount) { this_t retval; retval.lumaMinMax = lumaMinMax; + retval.sampleCount = sampleCount; return retval; } @@ -55,6 +57,34 @@ struct geom_meter { return max(log2(luma), log2(lumaMinMax.x)); } + void uploadFloat( + NBL_REF_ARG(ValueAccessor) val_accessor, + uint32_t index, + float_t val, + float_t minLog2, + float_t rangeLog2 + ) + { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + + val_accessor.atomicAdd(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); + } + + float_t downloadFloat( + NBL_REF_ARG(ValueAccessor) val_accessor, + uint32_t index, + float_t minLog2, + float_t rangeLog2 + ) + { + float_t luma = (float_t)val.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); + luma = luma / rangeLog2 + minLog2; + return luma; + } + void sampleLuma( NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(ValueAccessor) val, @@ -74,18 +104,37 @@ struct geom_meter { float_t lumaSum = reduction(luma, sdata); if (tid == GroupSize - 1) { - uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); - uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); - - uint32_t lumaSumBitPattern = uint32_t(clamp((lumaSum - log2(lumaMinMax.x)) * (log2(lumaMinMax.y) - log2(lumaMinMax.x)), 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); - uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); - val.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); + uploadFloat( + val, + workgroupIndex, + lumaSum, + log2(lumaMinMax.x), + log2(lumaMinMax.y / lumaMinMax.x) + ); } } + void gatherLuma( + NBL_REF_ARG(ValueAccessor) val + ) + { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + float_t lumaSum = glsl::subgroupAdd( + downloadFloat( + val, + tid, + log2(lumaMinMax.x), + log2(lumaMinMax.y / lumaMinMax.x) + ) + ); + + uploadFloat(val, 0, lumaSum, log2(lumaMinMax.x), log2(lumaMinMax.y / lumaMinMax.x)); + } + + float_t sampleCount; float_t2 lumaMinMax; }; } From 49a80499c4ee3c7b09ce20e1f7a995d63cc7a73d Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 27 Aug 2024 19:37:11 +0530 Subject: [PATCH 23/36] Normalize tileOffset and coord to uv before computing Luma --- .../nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index c39b2e3ab6..6804c1d631 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -50,7 +50,7 @@ struct geom_meter { { float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; float_t3 color = tex.get(uvPos); - float_t luma = TexAccessor::toXYZ(color); + float_t luma = (float_t)TexAccessor::toXYZ(color); luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); @@ -80,7 +80,7 @@ struct geom_meter { float_t rangeLog2 ) { - float_t luma = (float_t)val.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); + float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); luma = luma / rangeLog2 + minLog2; return luma; } @@ -90,7 +90,8 @@ struct geom_meter { NBL_REF_ARG(ValueAccessor) val, NBL_REF_ARG(TexAccessor) tex, NBL_REF_ARG(SharedAccessor) sdata, - float_t2 tileOffset + float_t2 tileOffset, + float_t2 viewportSize ) { uint32_t tid = workgroup::SubgroupContiguousIndex(); @@ -100,7 +101,8 @@ struct geom_meter { }; float_t luma = 0.0f; - luma = computeLumaLog2(window, tex, tileOffset + (float32_t2)(coord)); + float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; + luma = computeLumaLog2(window, tex, shiftedCoord); float_t lumaSum = reduction(luma, sdata); if (tid == GroupSize - 1) { @@ -117,7 +119,7 @@ struct geom_meter { } } - void gatherLuma( + float_t gatherLuma( NBL_REF_ARG(ValueAccessor) val ) { @@ -131,7 +133,7 @@ struct geom_meter { ) ); - uploadFloat(val, 0, lumaSum, log2(lumaMinMax.x), log2(lumaMinMax.y / lumaMinMax.x)); + return lumaSum; } float_t sampleCount; From 8a10ae2e12f36d48f39ff3350920d800da1cc47e Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sun, 29 Sep 2024 18:16:56 +0100 Subject: [PATCH 24/36] Simplify return statement --- include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 6804c1d631..266d6e6a2a 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -81,8 +81,7 @@ struct geom_meter { ) { float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); - luma = luma / rangeLog2 + minLog2; - return luma; + return luma / rangeLog2 + minLog2; } void sampleLuma( From 6b01b6ddd4e687684e6e7a5f8073f7e556ad6967 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 11 Dec 2024 00:26:02 +0000 Subject: [PATCH 25/36] Update submodule pointers --- 3rdparty/dxc/dxc | 2 +- 3rdparty/libexpat | 2 +- 3rdparty/nbl_spirv_cross | 2 +- 3rdparty/openexr | 2 +- 3rdparty/volk | 2 +- examples_tests | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index a08b6cbeb1..29a5e1258e 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit a08b6cbeb1038d14d0586d10a8cfa507b2fda8eb +Subproject commit 29a5e1258e2f01dd15ef1f58e24a02337c96c8f7 diff --git a/3rdparty/libexpat b/3rdparty/libexpat index e2004f9195..39e487da35 160000 --- a/3rdparty/libexpat +++ b/3rdparty/libexpat @@ -1 +1 @@ -Subproject commit e2004f9195700bb8248c8c954578f14fda58be27 +Subproject commit 39e487da353b20bb3a724311d179ba0fddffc65b diff --git a/3rdparty/nbl_spirv_cross b/3rdparty/nbl_spirv_cross index f4accc2a4b..b52e6a55ca 160000 --- a/3rdparty/nbl_spirv_cross +++ b/3rdparty/nbl_spirv_cross @@ -1 +1 @@ -Subproject commit f4accc2a4b478c42038c920aa0e43a8aab7d135c +Subproject commit b52e6a55ca2d9805a18dccfc45c7a2e692c1d8e1 diff --git a/3rdparty/openexr b/3rdparty/openexr index fca936a964..824ed557b3 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit fca936a964da5983daecdbed7cd249934701b41a +Subproject commit 824ed557b3c59288a685356c708e5806b1122fe1 diff --git a/3rdparty/volk b/3rdparty/volk index b6be5ba0af..efb96f9031 160000 --- a/3rdparty/volk +++ b/3rdparty/volk @@ -1 +1 @@ -Subproject commit b6be5ba0af5567974cc8a0261471573418f0f34f +Subproject commit efb96f90317e1c902d6b45ae95d14e67779a2241 diff --git a/examples_tests b/examples_tests index 8b6675b3ba..36633f5c2c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8b6675b3ba9fe1ca00f2c6573a4888abb8477da7 +Subproject commit 36633f5c2cae3e8e870a837c86e71f3a50061a3e From f95f1c1e7eb5fe5c930b1c0badba345f4e27033e Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 11 Dec 2024 00:54:41 +0000 Subject: [PATCH 26/36] Update submodule pointer --- 3rdparty/imgui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/imgui b/3rdparty/imgui index e489e40a85..a29e9dba30 160000 --- a/3rdparty/imgui +++ b/3rdparty/imgui @@ -1 +1 @@ -Subproject commit e489e40a853426767de9ce0637bc0c9ceb431c1e +Subproject commit a29e9dba3012eca9f80bdc4c39ca61a1df8e7175 From 1a5827379821023273130a547b8ba50141cd85a9 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 13 Dec 2024 04:34:45 +0000 Subject: [PATCH 27/36] Update submodule pointer --- 3rdparty/Vulkan-Headers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 2c823b7f27..31aa7f634b 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 2c823b7f27590ec0a489f7fbe14b154e13fa5cfb +Subproject commit 31aa7f634b052d87ede4664053e85f3f4d1d50d3 From b6e1f57110c4e34715bd6c15223a1db9224c47ff Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 13 Dec 2024 04:46:17 +0000 Subject: [PATCH 28/36] Update submodule pointer --- 3rdparty/volk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/volk b/3rdparty/volk index efb96f9031..b6be5ba0af 160000 --- a/3rdparty/volk +++ b/3rdparty/volk @@ -1 +1 @@ -Subproject commit efb96f90317e1c902d6b45ae95d14e67779a2241 +Subproject commit b6be5ba0af5567974cc8a0261471573418f0f34f From 5239c29945cd2f609d13f40c66af3dcc4bd2f6a2 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:42:26 +0000 Subject: [PATCH 29/36] Update submodule pointer --- 3rdparty/Vulkan-Headers | 2 +- 3rdparty/dxc/dxc | 2 +- 3rdparty/imgui | 2 +- 3rdparty/libexpat | 2 +- 3rdparty/nbl_spirv_cross | 2 +- 3rdparty/openexr | 2 +- 3rdparty/parallel-hashmap | 2 +- examples_tests | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 31aa7f634b..2c823b7f27 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 31aa7f634b052d87ede4664053e85f3f4d1d50d3 +Subproject commit 2c823b7f27590ec0a489f7fbe14b154e13fa5cfb diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index 5adc27f9e4..a08b6cbeb1 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit 5adc27f9e42de7681d65a98873048af661b9b367 +Subproject commit a08b6cbeb1038d14d0586d10a8cfa507b2fda8eb diff --git a/3rdparty/imgui b/3rdparty/imgui index a29e9dba30..e489e40a85 160000 --- a/3rdparty/imgui +++ b/3rdparty/imgui @@ -1 +1 @@ -Subproject commit a29e9dba3012eca9f80bdc4c39ca61a1df8e7175 +Subproject commit e489e40a853426767de9ce0637bc0c9ceb431c1e diff --git a/3rdparty/libexpat b/3rdparty/libexpat index 39e487da35..e2004f9195 160000 --- a/3rdparty/libexpat +++ b/3rdparty/libexpat @@ -1 +1 @@ -Subproject commit 39e487da353b20bb3a724311d179ba0fddffc65b +Subproject commit e2004f9195700bb8248c8c954578f14fda58be27 diff --git a/3rdparty/nbl_spirv_cross b/3rdparty/nbl_spirv_cross index b52e6a55ca..f4accc2a4b 160000 --- a/3rdparty/nbl_spirv_cross +++ b/3rdparty/nbl_spirv_cross @@ -1 +1 @@ -Subproject commit b52e6a55ca2d9805a18dccfc45c7a2e692c1d8e1 +Subproject commit f4accc2a4b478c42038c920aa0e43a8aab7d135c diff --git a/3rdparty/openexr b/3rdparty/openexr index c8a74d9ac9..fca936a964 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd +Subproject commit fca936a964da5983daecdbed7cd249934701b41a diff --git a/3rdparty/parallel-hashmap b/3rdparty/parallel-hashmap index 7684faf186..fd7b8fb87d 160000 --- a/3rdparty/parallel-hashmap +++ b/3rdparty/parallel-hashmap @@ -1 +1 @@ -Subproject commit 7684faf186806e2c88554a78188c18185b21f127 +Subproject commit fd7b8fb87d74cc990591c3443b2ef21e9e137500 diff --git a/examples_tests b/examples_tests index 36633f5c2c..f79caed8b5 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 36633f5c2cae3e8e870a837c86e71f3a50061a3e +Subproject commit f79caed8b54499c1a4e848672dec38ce85d9a184 From 06c915e42162869f11ae951b7a081c722505d4e8 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 21 Jan 2025 16:11:27 +0100 Subject: [PATCH 30/36] stop rolling back my modules! --- 3rdparty/Vulkan-Headers | 2 +- 3rdparty/imgui | 2 +- 3rdparty/imguizmo | 2 +- 3rdparty/openexr | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 2c823b7f27..31aa7f634b 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 2c823b7f27590ec0a489f7fbe14b154e13fa5cfb +Subproject commit 31aa7f634b052d87ede4664053e85f3f4d1d50d3 diff --git a/3rdparty/imgui b/3rdparty/imgui index e489e40a85..a29e9dba30 160000 --- a/3rdparty/imgui +++ b/3rdparty/imgui @@ -1 +1 @@ -Subproject commit e489e40a853426767de9ce0637bc0c9ceb431c1e +Subproject commit a29e9dba3012eca9f80bdc4c39ca61a1df8e7175 diff --git a/3rdparty/imguizmo b/3rdparty/imguizmo index 6f4b2197ef..b10e91756d 160000 --- a/3rdparty/imguizmo +++ b/3rdparty/imguizmo @@ -1 +1 @@ -Subproject commit 6f4b2197efd715d16b19775b00f36c6c6f5aacb6 +Subproject commit b10e91756d32395f5c1fefd417899b657ed7cb88 diff --git a/3rdparty/openexr b/3rdparty/openexr index fca936a964..c8a74d9ac9 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit fca936a964da5983daecdbed7cd249934701b41a +Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd From 90d20c44783c9f3837f554ae8a05beb1ecd9f956 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 21 Jan 2025 16:49:29 +0100 Subject: [PATCH 31/36] point submodule at head --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f79caed8b5..9e26a74aa1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f79caed8b54499c1a4e848672dec38ce85d9a184 +Subproject commit 9e26a74aa1bcbe5e26ee14a79d4f2ef9e2701e0d From 4edd38c002531e3bbf55a8f0649af187223a1077 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:57:14 +0000 Subject: [PATCH 32/36] Add capabilities for atomic ops --- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 2ecb08cdb2..973a313e9c 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -61,37 +61,45 @@ pointer_t copyObject([[vk::ext_reference]] T v); // Here's the thing with atomics, it's not only the data type that dictates whether you can do an atomic or not. // It's the storage class that has the most effect (shared vs storage vs image) and we can't check that easily template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t || is_same_v, T> atomicIAdd([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t || is_same_v, T> atomicIAdd([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t || is_same_v, T> atomicISub([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_same_v || is_same_v), T> atomicISub(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t || is_same_v, T> atomicISub([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_same_v || is_same_v), T> atomicISub(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); From f1e3e9866682fc79fa830d4a1c888674e24f58f7 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:58:01 +0000 Subject: [PATCH 33/36] Fix luma_meter --- include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 266d6e6a2a..9808b9e26d 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -105,8 +105,8 @@ struct geom_meter { float_t lumaSum = reduction(luma, sdata); if (tid == GroupSize - 1) { - uint32_t3 workgroupSize = glsl::gl_WorkGroupSize(); - uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID()); + uint32_t3 workgroupCount = glsl::gl_NumWorkGroups(); + uint32_t workgroupIndex = (workgroupCount.x * workgroupCount.y * workgroupCount.z) / 64; uploadFloat( val, @@ -122,8 +122,8 @@ struct geom_meter { NBL_REF_ARG(ValueAccessor) val ) { - uint32_t tid = workgroup::SubgroupContiguousIndex(); - float_t lumaSum = glsl::subgroupAdd( + uint32_t tid = glsl::gl_SubgroupInvocationID(); + float_t luma = glsl::subgroupAdd( downloadFloat( val, tid, @@ -132,7 +132,10 @@ struct geom_meter { ) ); - return lumaSum; + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + return (luma / (1 << fixedPointBitsLeft)) / sampleCount; } float_t sampleCount; From f1b7d170718d1ba0d48eef0b69af842be0463bea Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sun, 16 Mar 2025 11:07:47 +0000 Subject: [PATCH 34/36] Add median_luma_meter --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index 9808b9e26d..c17a64c437 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -141,6 +141,151 @@ struct geom_meter { float_t sampleCount; float_t2 lumaMinMax; }; + +template +struct median_meter { + using int_t = typename SharedAccessor::type; + using float_t = float32_t; + using float_t2 = typename conditional, float32_t2, float16_t2>::type; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using this_t = median_meter; + + static this_t create(float_t2 lumaMinMax, float_t sampleCount) { + this_t retval; + retval.lumaMinMax = lumaMinMax; + retval.sampleCount = sampleCount; + return retval; + } + + int_t inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { + return workgroup::inclusive_scan < plus < int_t >, GroupSize >:: + template __call (value, sdata); + } + + float_t computeLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(TexAccessor) tex, + float_t2 shiftedCoord + ) { + float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; + float_t3 color = tex.get(uvPos); + float_t luma = (float_t)TexAccessor::toXYZ(color); + + return clamp(luma, lumaMinMax.x, lumaMinMax.y); + } + + int_t float2Int( + float_t val, + float_t minLog2, + float_t rangeLog2 + ) { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + } + + float_t int2Float( + int_t val, + float_t minLog2, + float_t rangeLog2 + ) { + return val / rangeLog2 + minLog2; + } + + void sampleLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(HistogramAccessor) histo, + NBL_REF_ARG(TexAccessor) tex, + NBL_REF_ARG(SharedAccessor) sdata, + float_t2 tileOffset, + float_t2 viewportSize + ) { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + + for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { + sdata.set(vid, 0); + } + + sdata.workgroupExecutionAndMemoryBarrier(); + + uint32_t2 coord = { + morton2d_decode_x(tid), + morton2d_decode_y(tid) + }; + + float_t luma = 0.0f; + float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; + luma = computeLuma(window, tex, shiftedCoord); + + float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount; + uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize); + + sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + + sdata.workgroupExecutionAndMemoryBarrier(); + + float_t histogram_value; + sdata.get(tid, histogram_value); + + sdata.workgroupExecutionAndMemoryBarrier(); + + float_t sum = inclusive_scan(histogram_value, sdata); + histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + + const bool is_last_wg_invocation = tid == (GroupSize - 1); + const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize; + + for (int i = 1; i < RoundedBinCount; i++) { + uint32_t keyBucketStart = GroupSize * i; + uint32_t vid = tid + keyBucketStart; + + // no if statement about the last iteration needed + if (is_last_wg_invocation) { + float_t beforeSum; + sdata.get(keyBucketStart, beforeSum); + sdata.set(keyBucketStart, beforeSum + sum); + } + + // propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes + sdata.workgroupExecutionAndMemoryBarrier(); + + // no aliasing anymore + float_t atVid; + sdata.get(vid, atVid); + sum = inclusive_scan(atVid, sdata); + if (vid < BinCount) { + histo.atomicAdd(vid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + } + } + } + + float_t gatherLuma( + NBL_REF_ARG(HistogramAccessor) histo, + NBL_REF_ARG(SharedAccessor) sdata + ) { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + + for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { + sdata.set( + vid, + histo.get(vid & (BinCount - 1)) + ); + } + + sdata.workgroupExecutionAndMemoryBarrier(); + + uint32_t percentile40, percentile60; + sdata.get(BinCount * 0.4, percentile40); + sdata.get(BinCount * 0.6, percentile60); + + return (int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2; + } + + float_t sampleCount; + float_t2 lumaMinMax; +}; + } } } From 83ac633896008509ea16f8d896e4048f98eb888d Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sun, 16 Mar 2025 11:49:58 +0000 Subject: [PATCH 35/36] Update submodule pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 06dad8c118..498ffd21a0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 06dad8c118027d6ebc8ee04e19340ba643079a63 +Subproject commit 498ffd21a06b9e9c74d20b37860421d17fe7cf49 From 2b5e502d23c14b8cba96cb8a7ff7a4b6d4d5b4e3 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Mon, 17 Mar 2025 16:11:48 +0000 Subject: [PATCH 36/36] Make changes to luma_meter --- .../builtin/hlsl/luma_meter/luma_meter.hlsl | 48 ++++++++----------- .../builtin/hlsl/tonemapper/operators.hlsl | 20 +++++--- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl index c17a64c437..20af804603 100644 --- a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -36,13 +36,13 @@ struct geom_meter { return retval; } - float_t reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata) + float_t __reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { return workgroup::reduction < plus < float_t >, GroupSize >:: template __call (value, sdata); } - float_t computeLumaLog2( + float_t __computeLumaLog2( NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(TexAccessor) tex, float_t2 shiftedCoord @@ -54,26 +54,26 @@ struct geom_meter { luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); - return max(log2(luma), log2(lumaMinMax.x)); + return log2(luma); } - void uploadFloat( + void __uploadFloat( NBL_REF_ARG(ValueAccessor) val_accessor, - uint32_t index, float_t val, float_t minLog2, float_t rangeLog2 ) { uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64; uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); - val_accessor.atomicAdd(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); + val_accessor.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); } - float_t downloadFloat( + float_t __downloadFloat( NBL_REF_ARG(ValueAccessor) val_accessor, uint32_t index, float_t minLog2, @@ -101,17 +101,13 @@ struct geom_meter { float_t luma = 0.0f; float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; - luma = computeLumaLog2(window, tex, shiftedCoord); - float_t lumaSum = reduction(luma, sdata); - - if (tid == GroupSize - 1) { - uint32_t3 workgroupCount = glsl::gl_NumWorkGroups(); - uint32_t workgroupIndex = (workgroupCount.x * workgroupCount.y * workgroupCount.z) / 64; + float_t lumaLog2 = __computeLumaLog2(window, tex, shiftedCoord); + float_t lumaLog2Sum = __reduction(lumaLog2, sdata); - uploadFloat( + if (tid == 0) { + __uploadFloat( val, - workgroupIndex, - lumaSum, + lumaLog2Sum, log2(lumaMinMax.x), log2(lumaMinMax.y / lumaMinMax.x) ); @@ -124,7 +120,7 @@ struct geom_meter { { uint32_t tid = glsl::gl_SubgroupInvocationID(); float_t luma = glsl::subgroupAdd( - downloadFloat( + __downloadFloat( val, tid, log2(lumaMinMax.x), @@ -150,19 +146,18 @@ struct median_meter { using float_t3 = typename conditional, float32_t3, float16_t3>::type; using this_t = median_meter; - static this_t create(float_t2 lumaMinMax, float_t sampleCount) { + static this_t create(float_t2 lumaMinMax) { this_t retval; retval.lumaMinMax = lumaMinMax; - retval.sampleCount = sampleCount; return retval; } - int_t inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { + int_t __inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { return workgroup::inclusive_scan < plus < int_t >, GroupSize >:: template __call (value, sdata); } - float_t computeLuma( + float_t __computeLuma( NBL_CONST_REF_ARG(MeteringWindow) window, NBL_REF_ARG(TexAccessor) tex, float_t2 shiftedCoord @@ -174,7 +169,7 @@ struct median_meter { return clamp(luma, lumaMinMax.x, lumaMinMax.y); } - int_t float2Int( + int_t __float2Int( float_t val, float_t minLog2, float_t rangeLog2 @@ -185,7 +180,7 @@ struct median_meter { return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); } - float_t int2Float( + float_t __int2Float( int_t val, float_t minLog2, float_t rangeLog2 @@ -216,7 +211,7 @@ struct median_meter { float_t luma = 0.0f; float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; - luma = computeLuma(window, tex, shiftedCoord); + luma = __computeLuma(window, tex, shiftedCoord); float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount; uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize); @@ -255,7 +250,7 @@ struct median_meter { sdata.get(vid, atVid); sum = inclusive_scan(atVid, sdata); if (vid < BinCount) { - histo.atomicAdd(vid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + histo.atomicAdd(vid, __float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); } } } @@ -279,10 +274,9 @@ struct median_meter { sdata.get(BinCount * 0.4, percentile40); sdata.get(BinCount * 0.6, percentile60); - return (int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2; + return (__int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2; } - float_t sampleCount; float_t2 lumaMinMax; }; diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl index 824e31d68a..46d241c76c 100644 --- a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -19,20 +19,25 @@ template struct Reinhard { using float_t = enable_if_t::value, T>; - using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using float_t3 = vector; using this_t = Reinhard; + static this_t create(float_t EV, float_t key = 0.18f, float_t WhitePointRelToEV = 16.f) { this_t retval; + + const float_t unit = 1.0; retval.keyAndManualLinearExposure = key * exp2(EV); - retval.rcpWhite2 = 1.f / (WhitePointRelToEV * WhitePointRelToEV); + retval.rcpWhite2 = unit / (WhitePointRelToEV * WhitePointRelToEV); + return retval; } float_t3 operator()(float_t3 rawCIEXYZcolor) { + const float_t unit = 1.0; float_t exposureFactors = keyAndManualLinearExposure; float_t exposedLuma = rawCIEXYZcolor.y * exposureFactors; - float_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * rcpWhite2) / (1.0 + exposedLuma)); + float_t colorMultiplier = (exposureFactors * (unit + exposedLuma * rcpWhite2) / (unit + exposedLuma)); return rawCIEXYZcolor * colorMultiplier; } @@ -44,8 +49,8 @@ template struct ACES { using float_t = enable_if_t::value, T>; - using float_t3 = typename conditional, float32_t3, float16_t3>::type; - using float_t3x3 = typename conditional, float32_t3x3, float16_t3x3>::type; + using float_t3 = vector; + using float_t3x3 = matrix; using this_t = ACES; static this_t create(float_t EV, float_t key = 0.18f, float_t Contrast = 1.f) { @@ -57,9 +62,10 @@ struct ACES } float_t3 operator()(float_t3 rawCIEXYZcolor) { + const float_t unit = 1.0; float_t3 tonemapped = rawCIEXYZcolor; - if (tonemapped.y > 1.175494351e-38) - tonemapped *= exp2(log2(tonemapped.y) * (gamma - 1.0) + (exposure) * gamma); + if (tonemapped.y > bit_cast(numeric_limits::min)) + tonemapped *= exp2(log2(tonemapped.y) * (gamma - unit) + (exposure) * gamma); // XYZ => RRT_SAT // this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t)