diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index 00b04dd249..fdb41ed298 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -82,7 +82,7 @@ class IAsset : virtual public core::IReferenceCounted ET_SKELETON = 1ull<<7, //!< asset::ICPUSkeleton ET_ANIMATION_LIBRARY = 1ull<<8, //!< asset::ICPUAnimationLibrary ET_PIPELINE_LAYOUT = 1ull<<9, //!< asset::ICPUPipelineLayout - ET_SHADER = 1ull<<10, //!< asset::ICPUShader + ET_SHADER = 1ull<<10, //!< asset::IShader ET_RENDERPASS_INDEPENDENT_PIPELINE = 1ull<<12, //!< asset::ICPURenderpassIndependentPipeline ET_RENDERPASS = 1ull<<13, //!< asset::ICPURenderpass ET_FRAMEBUFFER = 1ull<<14, //!< asset::ICPUFramebuffer diff --git a/include/nbl/asset/ICPUComputePipeline.h b/include/nbl/asset/ICPUComputePipeline.h index 14b0277152..b9b707d9fc 100644 --- a/include/nbl/asset/ICPUComputePipeline.h +++ b/include/nbl/asset/ICPUComputePipeline.h @@ -19,7 +19,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> public: struct SCreationParams final : IPipeline::SCreationParams { - ICPUShader::SSpecInfo shader; + SShaderSpecInfo shader; }; static core::smart_refctd_ptr create(const SCreationParams& params) { @@ -41,8 +41,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> inline size_t getDependantCount() const override {return 2;} // provide default arg - inline IShader::SSpecInfo getSpecInfo() {return base_t::getSpecInfo(ICPUShader::E_SHADER_STAGE::ESS_COMPUTE);} - inline IShader::SSpecInfo getSpecInfo() const {return base_t::getSpecInfo(ICPUShader::E_SHADER_STAGE::ESS_COMPUTE);} + inline IPipelineBase::SShaderSpecInfo getSpecInfo() const {return base_t::getSpecInfo(hlsl::ShaderStage::ESS_COMPUTE);} protected: using base_t::base_t; @@ -60,9 +59,9 @@ class ICPUComputePipeline : public ICPUPipeline,1> return const_cast(m_layout.get()); } - inline int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const override + inline int8_t stageToIndex(const hlsl::ShaderStage stage) const override { - return stage!=ICPUShader::E_SHADER_STAGE::ESS_COMPUTE ? (-1):0; + return stage!=hlsl::ShaderStage::ESS_COMPUTE ? (-1):0; } }; diff --git a/include/nbl/asset/ICPUGraphicsPipeline.h b/include/nbl/asset/ICPUGraphicsPipeline.h index e319b27503..2643db7550 100644 --- a/include/nbl/asset/ICPUGraphicsPipeline.h +++ b/include/nbl/asset/ICPUGraphicsPipeline.h @@ -13,9 +13,9 @@ namespace nbl::asset { -class ICPUGraphicsPipeline final : public ICPUPipeline,5u> +class ICPUGraphicsPipeline final : public ICPUPipeline,5u> { - using pipeline_base_t = IGraphicsPipeline; + using pipeline_base_t = IGraphicsPipeline; using base_t = ICPUPipeline; public: @@ -32,7 +32,7 @@ class ICPUGraphicsPipeline final : public ICPUPipeline create(const SCreationParams& params) { // we'll validate the specialization info later when attempting to set it - if (!params.impl_valid([](const ICPUShader::SSpecInfo& info)->bool{return true;})) + if (!params.impl_valid([](const IPipelineBase::SShaderSpecInfo& info)->bool{return true;})) return nullptr; auto retval = new ICPUGraphicsPipeline(params); for (const auto spec : params.shaders) @@ -67,7 +67,7 @@ class ICPUGraphicsPipeline final : public ICPUPipeline&& layout) const override { - std::array _shaders; + std::array _shaders; for (auto i=0; i=GRAPHICS_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) diff --git a/include/nbl/asset/ICPUPipeline.h b/include/nbl/asset/ICPUPipeline.h index 5c43df0170..d1693f18eb 100644 --- a/include/nbl/asset/ICPUPipeline.h +++ b/include/nbl/asset/ICPUPipeline.h @@ -8,7 +8,6 @@ #include "nbl/asset/IAsset.h" #include "nbl/asset/IPipeline.h" #include "nbl/asset/ICPUPipelineLayout.h" -#include "nbl/asset/ICPUShader.h" namespace nbl::asset @@ -34,10 +33,10 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase if (shader) { auto stageInfo = m_stages[i].info; - core::smart_refctd_ptr newShader; + core::smart_refctd_ptr newShader; if (_depth>0u) { - newShader = core::smart_refctd_ptr_static_cast(shader->clone(_depth-1u)); + newShader = core::smart_refctd_ptr_static_cast(shader->clone(_depth-1u)); stageInfo.shader = newShader.get(); } cp->setSpecInfo(stageInfo); @@ -61,40 +60,50 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase PipelineNonAssetBase::m_layout = std::move(_layout); } - // The getters are weird because the shader pointer needs patching - inline IShader::SSpecInfo getSpecInfo(const ICPUShader::E_SHADER_STAGE stage) + // The getters are weird because the shader pointer, spec constant map and entry point needs patching + inline IShader* getShader(const hlsl::ShaderStage stage) + { + assert(isMutable()); + return const_cast(getSpecInfo(stage).shader); + } + inline std::string* getEntryPoint(const hlsl::ShaderStage stage) { - assert(isMutable()); const auto stageIx = stageToIndex(stage); if (stageIx<0) return {}; - return m_stages[stageIx].info; + return &m_stages[stageIx].entryPoint; } - inline IShader::SSpecInfo getSpecInfo(const ICPUShader::E_SHADER_STAGE stage) const + inline IPipelineBase::SShaderSpecInfo::spec_constant_map_t* getSpecConstantMap(const hlsl::ShaderStage stage) + { + assert(isMutable()); + return const_cast(getSpecInfo(stage).entries); + } + // + inline IPipelineBase::SShaderSpecInfo getSpecInfo(const hlsl::ShaderStage stage) const { const auto stageIx = stageToIndex(stage); if (stageIx<0) return {}; return m_stages[stageIx].info; } - inline bool setSpecInfo(const IShader::SSpecInfo& info) + inline bool setSpecInfo(const IPipelineBase::SShaderSpecInfo& info) { assert(isMutable()); const int64_t specSize = info.valid(); if (specSize<0) return false; - const auto stage = info.shader->getStage(); - const auto stageIx = stageToIndex(stage); + const auto stageIx = stageToIndex(info.stage); if (stageIx<0) return false; auto& outStage = m_stages[stageIx]; outStage.info = info; - outStage.shader = core::smart_refctd_ptr(info.shader); + outStage.entryPoint = info.entryPoint; + outStage.shader = core::smart_refctd_ptr(const_cast(info.shader)); outStage.info.shader = outStage.shader.get(); auto& outEntries = outStage.entries; if (specSize>0) { - outEntries = std::make_unique(); + outEntries = std::make_unique(); outEntries->reserve(info.entries->size()); std::copy(info.entries->begin(),info.entries->end(),std::insert_iterator(*outEntries,outEntries->begin())); } @@ -103,7 +112,7 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase outStage.info.entries = outEntries.get(); return true; } - inline bool clearStage(const ICPUShader::E_SHADER_STAGE stage) + inline bool clearStage(const hlsl::ShaderStage stage) { assert(isMutable()); const auto stageIx = stageToIndex(stage); @@ -118,12 +127,14 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase virtual ~ICPUPipeline() = default; virtual this_t* clone_impl(core::smart_refctd_ptr&& layout) const = 0; - virtual int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const = 0; + virtual int8_t stageToIndex(const hlsl::ShaderStage stage) const = 0; - struct ShaderStage { - core::smart_refctd_ptr shader = {}; - std::unique_ptr entries = {}; - ICPUShader::SSpecInfo info = {}; + struct ShaderStage + { + std::string entryPoint = {}; + core::smart_refctd_ptr shader = {}; + std::unique_ptr entries = {}; + IPipelineBase::SShaderSpecInfo info = {}; } m_stages[MaxShaderStageCount] = {}; }; diff --git a/include/nbl/asset/ICPURenderpassIndependentPipeline.h b/include/nbl/asset/ICPURenderpassIndependentPipeline.h index 86bdede894..ed0171d11f 100644 --- a/include/nbl/asset/ICPURenderpassIndependentPipeline.h +++ b/include/nbl/asset/ICPURenderpassIndependentPipeline.h @@ -6,7 +6,7 @@ #include "nbl/asset/IRenderpassIndependentPipeline.h" #include "nbl/asset/ICPUPipelineLayout.h" -#include "nbl/asset/ICPUShader.h" +#include "nbl/asset/IShader.h" namespace nbl::asset { @@ -16,10 +16,8 @@ namespace nbl::asset @see IRenderpassIndependentPipeline */ -class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline, public IAsset +class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline, public IAsset { - using base_t = IRenderpassIndependentPipeline; - public: //(TODO) it is true however it causes DSs to not be cached when ECF_DONT_CACHE_TOP_LEVEL is set which isnt really intuitive constexpr static inline uint32_t DESC_SET_HIERARCHYLEVELS_BELOW = 0u; @@ -38,9 +36,11 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< if (!_layout || params.shaders.empty()) return nullptr; auto retval = new ICPURenderpassIndependentPipeline(std::move(_layout),params.cached); +#if 0 for (const auto spec : params.shaders) if (spec.shader) retval->setSpecInfo(spec); +#endif return core::smart_refctd_ptr(retval,core::dont_grab); } @@ -54,9 +54,11 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< layout = m_layout; auto cp = new ICPURenderpassIndependentPipeline(std::move(layout),m_cachedParams); +#if 0 for (const auto spec : m_infos) if (spec.shader) cp->setSpecInfo(spec); +#endif return core::smart_refctd_ptr(cp,core::dont_grab); } @@ -67,7 +69,7 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< inline size_t getDependantCount() const override {return 0;} // - inline const SCachedCreationParams& getCachedCreationParams() const {return base_t::getCachedCreationParams();} + inline const SCachedCreationParams& getCachedCreationParams() const {return IRenderpassIndependentPipeline::getCachedCreationParams();} inline SCachedCreationParams& getCachedCreationParams() { assert(isMutable()); @@ -87,8 +89,9 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< m_layout = std::move(_layout); } +#if 0 // The getters are weird because the shader pointer needs patching - inline IShader::SSpecInfo getSpecInfo(const ICPUShader::E_SHADER_STAGE stage) + inline IShader::SSpecInfo getSpecInfo(const hlsl::ShaderStage stage) { assert(isMutable()); const auto stageIx = hlsl::findLSB(stage); @@ -96,7 +99,7 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< return {}; return m_infos[stageIx]; } - inline IShader::SSpecInfo getSpecInfo(const ICPUShader::E_SHADER_STAGE stage) const + inline IShader::SSpecInfo getSpecInfo(const hlsl::ShaderStage stage) const { const auto stageIx = hlsl::findLSB(stage); if (stageIx<0 || stageIx>=GRAPHICS_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) @@ -127,18 +130,21 @@ class ICPURenderpassIndependentPipeline : public IRenderpassIndependentPipeline< m_infos[stageIx].entries = m_entries[stageIx].get(); return true; } +#endif protected: - ICPURenderpassIndependentPipeline(core::smart_refctd_ptr&& _layout, const base_t::SCachedCreationParams& params) - : base_t(params), m_layout(std::move(_layout)) {} + ICPURenderpassIndependentPipeline(core::smart_refctd_ptr&& _layout, const IRenderpassIndependentPipeline::SCachedCreationParams& params) + : IRenderpassIndependentPipeline(params), m_layout(std::move(_layout)) {} virtual ~ICPURenderpassIndependentPipeline() = default; inline IAsset* getDependant_impl(const size_t ix) override {return nullptr;} core::smart_refctd_ptr m_layout; - std::array,GRAPHICS_SHADER_STAGE_COUNT> m_shaders = {}; - std::array,GRAPHICS_SHADER_STAGE_COUNT> m_entries = {}; - std::array m_infos = {}; +#if 0 + std::array,GRAPHICS_SHADER_STAGE_COUNT> m_shaders = {}; + std::array,GRAPHICS_SHADER_STAGE_COUNT> m_entries = {}; + std::array m_infos = {}; +#endif }; } diff --git a/include/nbl/asset/ICPUShader.h b/include/nbl/asset/ICPUShader.h deleted file mode 100644 index ae8758d44f..0000000000 --- a/include/nbl/asset/ICPUShader.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_I_CPU_SHADER_H_INCLUDED_ -#define _NBL_ASSET_I_CPU_SHADER_H_INCLUDED_ - -#include -#include - - -#include "nbl/asset/IAsset.h" -#include "nbl/asset/ICPUBuffer.h" -#include "nbl/asset/IShader.h" - -namespace nbl::asset -{ - -//! CPU Version of Unspecialized Shader -/* - @see IShader - @see IAsset -*/ - -class ICPUShader : public IAsset, public IShader -{ - protected: - virtual ~ICPUShader() = default; - - public: - using SSpecInfo = IShader::SSpecInfo; - - ICPUShader(core::smart_refctd_ptr&& code, const E_SHADER_STAGE stage, E_CONTENT_TYPE contentType, std::string&& filepathHint) - : IShader(stage, std::move(filepathHint)), m_code(std::move(code)), m_contentType(contentType) {} - - ICPUShader(const char* code, const E_SHADER_STAGE stage, const E_CONTENT_TYPE contentType, std::string&& filepathHint) - : ICPUShader(ICPUBuffer::create({ strlen(code) + 1u }), stage, contentType, std::move(filepathHint)) - { - assert(contentType != E_CONTENT_TYPE::ECT_SPIRV); // because using strlen needs `code` to be null-terminated - memcpy(m_code->getPointer(), code, m_code->getSize()); - } - - constexpr static inline auto AssetType = ET_SHADER; - inline E_TYPE getAssetType() const override { return AssetType; } - - inline core::smart_refctd_ptr clone(uint32_t _depth = ~0u) const override - { - auto buf = (_depth > 0u && m_code) ? core::smart_refctd_ptr_static_cast(m_code->clone(_depth-1u)) : m_code; - return core::smart_refctd_ptr(new ICPUShader(std::move(buf), getStage(), m_contentType, std::string(getFilepathHint())), core::dont_grab); - } - - //! - inline size_t getDependantCount() const override {return 1;} - - const ICPUBuffer* getContent() const { return m_code.get(); }; - - inline E_CONTENT_TYPE getContentType() const { return m_contentType; } - - inline bool isContentHighLevelLanguage() const - { - return (m_contentType == E_CONTENT_TYPE::ECT_GLSL || m_contentType == E_CONTENT_TYPE::ECT_HLSL); - } - - bool setShaderStage(const E_SHADER_STAGE stage) - { - if(!isMutable()) - return m_shaderStage == stage; - m_shaderStage = stage; - return true; - } - - bool setFilePathHint(std::string&& filepathHint) - { - if(!isMutable()) - return false; - m_filepathHint = std::move(filepathHint); - return true; - } - - protected: - inline IAsset* getDependant_impl(const size_t ix) override {return m_code.get();} - - const core::smart_refctd_ptr m_code; - const E_CONTENT_TYPE m_contentType; -}; - -} -#endif diff --git a/include/nbl/asset/IDescriptorSetLayout.h b/include/nbl/asset/IDescriptorSetLayout.h index f65d01ddc5..140b8d7485 100644 --- a/include/nbl/asset/IDescriptorSetLayout.h +++ b/include/nbl/asset/IDescriptorSetLayout.h @@ -93,7 +93,7 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: return m_bindingNumbers[index.data]; } - inline core::bitflag getStageFlags(const storage_range_index_t index) const + inline core::bitflag getStageFlags(const storage_range_index_t index) const { assert(index.data < m_count); return m_stageFlags[index.data]; @@ -120,11 +120,11 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: // The following are merely convenience functions for one off use. // If you already have an index (the result of `findBindingStorageIndex`) lying around use the above functions for quick lookups, and to avoid unnecessary binary searches. - inline core::bitflag getStageFlags(const binding_number_t binding) const + inline core::bitflag getStageFlags(const binding_number_t binding) const { const auto index = findBindingStorageIndex(binding); if (!index) - return IShader::E_SHADER_STAGE::ESS_UNKNOWN; + return hlsl::ShaderStage::ESS_UNKNOWN; return getStageFlags(index); } @@ -169,7 +169,7 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: { uint32_t binding; core::bitflag createFlags; - core::bitflag stageFlags; + core::bitflag stageFlags; uint32_t count; inline bool operator< (const SBuildInfo& other) const { return binding < other.binding; } @@ -212,13 +212,13 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: offset += m_count * sizeof(binding_number_t); assert(core::is_aligned_ptr(m_bindingNumbers)); - assert(alignof(core::bitflag) <= alignof(decltype(m_bindingNumbers[0]))); + assert(alignof(core::bitflag) <= alignof(decltype(m_bindingNumbers[0]))); - m_stageFlags = reinterpret_cast*>(m_data.get() + offset); - offset += m_count * sizeof(core::bitflag); + m_stageFlags = reinterpret_cast*>(m_data.get() + offset); + offset += m_count * sizeof(core::bitflag); assert(core::is_aligned_ptr(m_stageFlags)); - assert(alignof(core::bitflag) >= alignof(storage_offset_t)); + assert(alignof(core::bitflag) >= alignof(storage_offset_t)); m_storageOffsets = reinterpret_cast(m_data.get() + offset); offset += m_count * sizeof(storage_offset_t); @@ -237,7 +237,7 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: const size_t result = m_count * ( sizeof(binding_number_t) + sizeof(core::bitflag) + - sizeof(core::bitflag) + + sizeof(core::bitflag) + sizeof(storage_offset_t)); return result; } @@ -261,7 +261,7 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO: binding_number_t* m_bindingNumbers = nullptr; core::bitflag* m_createFlags = nullptr; - core::bitflag* m_stageFlags = nullptr; + core::bitflag* m_stageFlags = nullptr; storage_offset_t* m_storageOffsets = nullptr; std::unique_ptr m_data = nullptr; @@ -325,7 +325,7 @@ class IDescriptorSetLayout : public IDescriptorSetLayoutBase uint32_t binding; IDescriptor::E_TYPE type; core::bitflag createFlags; - core::bitflag stageFlags; + core::bitflag stageFlags; uint32_t count; // Use this if you want immutable samplers that are baked into the DS layout itself. // If it's `nullptr` then the samplers used are mutable and can be specified while writing the image descriptor to a binding while updating the DS. diff --git a/include/nbl/asset/IGraphicsPipeline.h b/include/nbl/asset/IGraphicsPipeline.h index 62861fdc9d..c59ad51ca9 100644 --- a/include/nbl/asset/IGraphicsPipeline.h +++ b/include/nbl/asset/IGraphicsPipeline.h @@ -81,7 +81,7 @@ class IGraphicsPipelineBase : public virtual core::IReferenceCounted }; }; -template +template class IGraphicsPipeline : public IPipeline, public IGraphicsPipelineBase { protected: @@ -91,7 +91,7 @@ class IGraphicsPipeline : public IPipeline, public IGraphics struct SCreationParams : IPipeline::SCreationParams { protected: - using SpecInfo = ShaderType::SSpecInfo; + using SpecInfo = IPipelineBase::SShaderSpecInfo; template inline bool impl_valid(ExtraLambda&& extra) const { @@ -105,29 +105,29 @@ class IGraphicsPipeline : public IPipeline, public IGraphics // TODO: check rasterization samples, etc. //rp->getCreationParameters().subpasses[i] - core::bitflag stagePresence = {}; + core::bitflag stagePresence = {}; for (const auto info : shaders) if (info.shader) { if (!extra(info)) return false; - const auto stage = info.shader->getStage(); - if (stage>ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT) + const auto stage = info.stage; + if (stage>hlsl::ShaderStage::ESS_FRAGMENT) return false; if (stagePresence.hasFlags(stage)) return false; stagePresence |= stage; } // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096 - if (!stagePresence.hasFlags(ICPUShader::E_SHADER_STAGE::ESS_VERTEX)) + if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_VERTEX)) return false; // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pStages-00729 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pStages-00730 - if (stagePresence.hasFlags(ICPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL)!=stagePresence.hasFlags(ICPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION)) + if (stagePresence.hasFlags(hlsl::ShaderStage::ESS_TESSELLATION_CONTROL)!=stagePresence.hasFlags(hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION)) return false; // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pStages-08888 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-topology-08889 - if (stagePresence.hasFlags(ICPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION)!=(cached.primitiveAssembly.primitiveType==EPT_PATCH_LIST)) + if (stagePresence.hasFlags(hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION)!=(cached.primitiveAssembly.primitiveType==EPT_PATCH_LIST)) return false; return true; diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index bd7035158e..036a684729 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -6,6 +6,7 @@ #include "nbl/asset/IPipelineLayout.h" +#include "nbl/asset/IShader.h" namespace nbl::asset @@ -24,19 +25,11 @@ namespace nbl::asset - compute pipeline - TODO: Raytracing */ -template -class IPipeline +class IPipelineBase { public: - // For now, due to API design we implicitly satisfy: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-08771 - // to: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pSpecializationInfo-06849 struct SCreationParams { - public: - const PipelineLayout* layout = nullptr; - protected: // This is not public to make sure that different pipelines only get the enums they support enum class FLAGS : uint64_t @@ -115,6 +108,160 @@ class IPipeline //PROTECTED_ACCESS_ONLY=1<<30, }; }; + + /* + Specialization info contains things such as entry point to a shader, + specialization map entry, required subgroup size, etc. for a blob of SPIR-V + + It also handles Specialization Constants. + + In Vulkan, all shaders get halfway-compiled into SPIR-V and + then then lowered (compiled) into the HW ISA by the Vulkan driver. + Normally, the half-way compile folds all constant values + and optimizes the code that uses them. + + But, it would be nice every so often to have your Vulkan + program sneak into the halfway-compiled SPIR-V binary and + manipulate some constants at runtime. This is what + Specialization Constants are for. + + So A Specialization Constant is a way of injecting an integer + constant into a halfway-compiled version of a shader right + before the lowering and linking when creating a pipeline. + + Without Specialization Constants, you would have to commit + to a final value before the SPIR-V compilation + */ + struct SShaderSpecInfo final + { + //! Structure specifying a specialization map entry + /* + Note that if specialization constant ID is used + in a shader, \bsize\b and \boffset'b must match + to \isuch an ID\i accordingly. + + By design the API satisfies: + https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-offset-00773 + https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-pMapEntries-00774 + */ + //!< The ID of the specialization constant in SPIR-V. If it isn't used in the shader, the map entry does not affect the behavior of the pipeline. + using spec_constant_id_t = uint32_t; + struct SSpecConstantValue + { + const void* data = nullptr; + //!< The byte size of the specialization constant value within the supplied data buffer. + uint32_t size = 0; + + inline operator bool() const {return data&&size;} + + auto operator<=>(const SSpecConstantValue&) const = default; + }; + inline SSpecConstantValue getSpecializationByteValue(const spec_constant_id_t _specConstID) const + { + if (!entries) + return { nullptr,0u }; + + const auto found = entries->find(_specConstID); + if (found != entries->end() && bool(found->second)) + return found->second; + else + return { nullptr,0u }; + } + + // Nabla requires device's reported subgroup size to be between 4 and 128 + enum class SUBGROUP_SIZE : uint8_t + { + // No constraint but probably means `gl_SubgroupSize` is Dynamically Uniform + UNKNOWN = 0, + // Allows the Subgroup Uniform `gl_SubgroupSize` to be non-Dynamically Uniform and vary between Device's min and max + VARYING = 1, + // The rest we encode as log2(x) of the required value + REQUIRE_4 = 2, + REQUIRE_8 = 3, + REQUIRE_16 = 4, + REQUIRE_32 = 5, + REQUIRE_64 = 6, + REQUIRE_128 = 7 + }; + + // + static constexpr int32_t INVALID_SPEC_INFO = -1; + // Returns negative on failure, otherwise the size of the buffer required to reserve for the spec constant data + inline int32_t valid() const + { + if (!shader || hlsl::bitCount(stage)!=1) + return INVALID_SPEC_INFO; + + // Impossible to check: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pName-00707 + if (entryPoint.empty()) + return INVALID_SPEC_INFO; + + // Shader stages already checked for validity w.r.t. features enabled, during unspec shader creation, only check: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-08988 + if (requireFullSubgroups) + switch (stage) + { + case hlsl::ShaderStage::ESS_COMPUTE: [[fallthrough]]; + case hlsl::ShaderStage::ESS_TASK: [[fallthrough]]; + case hlsl::ShaderStage::ESS_MESH: + break; + default: + return INVALID_SPEC_INFO; + break; + } + // Impossible to efficiently check anything from: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-maxClipDistances-00708 + // to: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-06686 + // and from: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02756 + // to: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-module-08987 + + int64_t specData = 0; + if (entries) + for (const auto& entry : *entries) + { + if (!entry.second) + return INVALID_SPEC_INFO; + specData += entry.second.size; + } + if (specData>0x7fffffff) + return INVALID_SPEC_INFO; + return static_cast(specData); + } + + using spec_constant_map_t = core::unordered_map; + + const IShader* shader = nullptr; + // A name of the function where the entry point of an shader executable begins. It's often "main" function. + std::string_view entryPoint = {}; + // stage must be set + hlsl::ShaderStage stage = hlsl::ShaderStage::ESS_UNKNOWN; + // there's some padding here + SUBGROUP_SIZE requiredSubgroupSize : 3 = SUBGROUP_SIZE::UNKNOWN; //!< Default value of 8 means no requirement + // Valid only for Compute, Mesh and Task shaders + uint8_t requireFullSubgroups : 1 = false; + // Container choice implicitly satisfies: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-constantID-04911 + const spec_constant_map_t* entries = nullptr; + // By requiring Nabla Core Profile features we implicitly satisfy: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-02784 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-02785 + // Also because our API is sane, it satisfies the following by construction: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02754 + }; +}; +template +class IPipeline : public IPipelineBase +{ + public: + // For now, due to API design we implicitly satisfy a bunch of VUIDs + struct SCreationParams : protected IPipelineBase::SCreationParams + { + public: + const PipelineLayout* layout = nullptr; + }; inline const PipelineLayout* getLayout() const {return m_layout.get();} diff --git a/include/nbl/asset/IPipelineLayout.h b/include/nbl/asset/IPipelineLayout.h index 0eaba46f7c..430c812dcb 100644 --- a/include/nbl/asset/IPipelineLayout.h +++ b/include/nbl/asset/IPipelineLayout.h @@ -35,7 +35,7 @@ namespace nbl::asset struct SPushConstantRange { - IShader::E_SHADER_STAGE stageFlags; + hlsl::ShaderStage stageFlags; uint32_t offset; uint32_t size; @@ -148,7 +148,7 @@ class IPipelineLayout using type_bitset_t = std::bitset(IDescriptor::E_TYPE::ET_COUNT)>; hlsl::SBindingInfo binding = {}; - core::bitflag requiredStages = IShader::E_SHADER_STAGE::ESS_UNKNOWN; + core::bitflag requiredStages = hlsl::ShaderStage::ESS_UNKNOWN; // could have just initialized with `~type_bitset_t()` in C++23 type_bitset_t allowedTypes = type_bitset_t((0x1u<(IDescriptor::E_TYPE::ET_COUNT))-1); }; diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index e531b034e1..0bc2d68653 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -50,7 +50,7 @@ class IRayTracingPipelineBase : public virtual core::IReferenceCounted }; }; -template +template class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase { using base_creation_params_t = IPipeline::SCreationParams; @@ -81,7 +81,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra #undef base_flag protected: - using SpecInfo = ShaderType::SSpecInfo; + using SpecInfo = IPipelineBase::SShaderSpecInfo; template inline bool impl_valid(ExtraLambda&& extra) const { @@ -94,10 +94,10 @@ class IRayTracingPipeline : public IPipeline, public IRayTra { if (!extra(info)) return false; - const auto stage = info.shader->getStage(); - if ((stage & ~ICPUShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING) != 0) + const auto stage = info.stage; + if ((stage & ~IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING) != 0) return false; - if (!std::has_single_bit>(stage)) + if (!std::has_single_bit>(stage)) return false; } else @@ -107,12 +107,12 @@ class IRayTracingPipeline : public IPipeline, public IRayTra } } - auto getShaderStage = [this](size_t index) -> ICPUShader::E_SHADER_STAGE + auto getShaderStage = [this](size_t index) -> IShader::E_SHADER_STAGE { - return shaders[index].shader->getStage(); + return shaders[index].stage; }; - auto isValidShaderIndex = [this, getShaderStage](size_t index, ICPUShader::E_SHADER_STAGE expectedStage, bool is_unused_shader_forbidden) -> bool + auto isValidShaderIndex = [this, getShaderStage](size_t index, IShader::E_SHADER_STAGE expectedStage, bool is_unused_shader_forbidden) -> bool { if (index == SShaderGroupsParams::SIndex::Unused) return !is_unused_shader_forbidden; @@ -123,7 +123,7 @@ class IRayTracingPipeline : public IPipeline, public IRayTra return true; }; - if (!isValidShaderIndex(shaderGroups.raygen.index, ICPUShader::E_SHADER_STAGE::ESS_RAYGEN, true)) + if (!isValidShaderIndex(shaderGroups.raygen.index, IShader::E_SHADER_STAGE::ESS_RAYGEN, true)) { return false; } @@ -132,18 +132,18 @@ class IRayTracingPipeline : public IPipeline, public IRayTra { // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03470 if (!isValidShaderIndex(shaderGroup.anyHit, - ICPUShader::E_SHADER_STAGE::ESS_ANY_HIT, + IShader::E_SHADER_STAGE::ESS_ANY_HIT, bool(flags & FLAGS::NO_NULL_ANY_HIT_SHADERS))) return false; // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03471 if (!isValidShaderIndex(shaderGroup.closestHit, - ICPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT, + IShader::E_SHADER_STAGE::ESS_CLOSEST_HIT, bool(flags & FLAGS::NO_NULL_CLOSEST_HIT_SHADERS))) return false; if (!isValidShaderIndex(shaderGroup.intersection, - ICPUShader::E_SHADER_STAGE::ESS_INTERSECTION, + IShader::E_SHADER_STAGE::ESS_INTERSECTION, false)) return false; } @@ -151,14 +151,14 @@ class IRayTracingPipeline : public IPipeline, public IRayTra for (const auto& shaderGroup : shaderGroups.misses) { if (!isValidShaderIndex(shaderGroup.index, - ICPUShader::E_SHADER_STAGE::ESS_MISS, + IShader::E_SHADER_STAGE::ESS_MISS, false)) return false; } for (const auto& shaderGroup : shaderGroups.callables) { - if (!isValidShaderIndex(shaderGroup.index, ICPUShader::E_SHADER_STAGE::ESS_CALLABLE, false)) + if (!isValidShaderIndex(shaderGroup.index, IShader::E_SHADER_STAGE::ESS_CALLABLE, false)) return false; } return true; diff --git a/include/nbl/asset/IRenderpassIndependentPipeline.h b/include/nbl/asset/IRenderpassIndependentPipeline.h index 542bac60c7..7f33b6abc4 100644 --- a/include/nbl/asset/IRenderpassIndependentPipeline.h +++ b/include/nbl/asset/IRenderpassIndependentPipeline.h @@ -18,7 +18,6 @@ namespace nbl::asset { //! Deprecated class but needs to stay around till Material Compiler 2 -template class IRenderpassIndependentPipeline { public: @@ -31,8 +30,7 @@ class IRenderpassIndependentPipeline }; struct SCreationParams { - using SpecInfo = ShaderType::SSpecInfo; - std::span shaders = {}; + std::span shaders = {}; SCachedCreationParams cached = {}; }; diff --git a/include/nbl/asset/IShader.h b/include/nbl/asset/IShader.h index 8332f533c6..a6dab09b54 100644 --- a/include/nbl/asset/IShader.h +++ b/include/nbl/asset/IShader.h @@ -24,24 +24,12 @@ namespace nbl::asset //! Interface class for Unspecialized Shaders /* - The purpose for the class is for storing raw HLSL code - to be compiled or already compiled (but unspecialized) - SPIR-V code. + The purpose for the class is for storing raw HLSL code to be compiled + or already compiled (but unspecialized) SPIR-V code. */ - -class IShader : public virtual core::IReferenceCounted // TODO: do we need this inheritance? +class IShader : public IAsset { public: - using E_SHADER_STAGE = nbl::hlsl::ShaderStage; - - IShader(const E_SHADER_STAGE shaderStage, std::string&& filepathHint) - : m_shaderStage(shaderStage), m_filepathHint(std::move(filepathHint)) {} - - inline E_SHADER_STAGE getStage() const { return m_shaderStage; } - - inline const std::string& getFilepathHint() const { return m_filepathHint; } - - enum class E_CONTENT_TYPE : uint8_t { ECT_UNKNOWN = 0, @@ -49,191 +37,70 @@ class IShader : public virtual core::IReferenceCounted // TODO: do we need this ECT_HLSL, ECT_SPIRV, }; - - struct SSpecInfoBase + // + inline IShader(core::smart_refctd_ptr&& code, const E_CONTENT_TYPE contentType, std::string&& filepathHint) : + m_filepathHint(std::move(filepathHint)), m_code(std::move(code)), m_contentType(contentType) {} + inline IShader(const char* code, const E_CONTENT_TYPE contentType, std::string&& filepathHint) : + m_filepathHint(std::move(filepathHint)), m_code(ICPUBuffer::create({strlen(code)+1u})), m_contentType(contentType) { - //! Structure specifying a specialization map entry - /* - Note that if specialization constant ID is used - in a shader, \bsize\b and \boffset'b must match - to \isuch an ID\i accordingly. - - By design the API satisfies: - https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-offset-00773 - https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-pMapEntries-00774 - */ - //!< The ID of the specialization constant in SPIR-V. If it isn't used in the shader, the map entry does not affect the behavior of the pipeline. - using spec_constant_id_t = uint32_t; - struct SSpecConstantValue - { - const void* data = nullptr; - //!< The byte size of the specialization constant value within the supplied data buffer. - uint32_t size = 0; - - inline operator bool() const {return data&&size;} - - auto operator<=>(const SSpecConstantValue&) const = default; - }; - // Nabla requires device's reported subgroup size to be between 4 and 128 - enum class SUBGROUP_SIZE : uint8_t - { - // No constraint but probably means `gl_SubgroupSize` is Dynamically Uniform - UNKNOWN = 0, - // Allows the Subgroup Uniform `gl_SubgroupSize` to be non-Dynamically Uniform and vary between Device's min and max - VARYING = 1, - // The rest we encode as log2(x) of the required value - REQUIRE_4 = 2, - REQUIRE_8 = 3, - REQUIRE_16 = 4, - REQUIRE_32 = 5, - REQUIRE_64 = 6, - REQUIRE_128 = 7 - }; - - using spec_constant_map_t = core::unordered_map; - }; - /* - Specialization info contains things such as entry point to a shader, - specialization map entry, required subgroup size, etc. for a blob of SPIR-V - - It also handles Specialization Constants. - - In Vulkan, all shaders get halfway-compiled into SPIR-V and - then then lowered (compiled) into the HW ISA by the Vulkan driver. - Normally, the half-way compile folds all constant values - and optimizes the code that uses them. - - But, it would be nice every so often to have your Vulkan - program sneak into the halfway-compiled SPIR-V binary and - manipulate some constants at runtime. This is what - Specialization Constants are for. - - So A Specialization Constant is a way of injecting an integer - constant into a halfway-compiled version of a shader right - before the lowering and linking when creating a pipeline. - - Without Specialization Constants, you would have to commit - to a final value before the SPIR-V compilation - */ - template - struct SSpecInfo final : SSpecInfoBase + assert(contentType!=E_CONTENT_TYPE::ECT_SPIRV); // because using strlen needs `code` to be null-terminated + memcpy(m_code->getPointer(),code,m_code->getSize()); + } + + constexpr static inline auto AssetType = ET_SHADER; + inline E_TYPE getAssetType() const override { return AssetType; } + + // + inline size_t getDependantCount() const override { return 1; } + + // + inline core::smart_refctd_ptr clone(uint32_t _depth=~0u) const override { - inline SSpecConstantValue getSpecializationByteValue(const spec_constant_id_t _specConstID) const - { - if (!entries) - return {nullptr,0u}; - - const auto found = entries->find(_specConstID); - if (found!=entries->end() && bool(found->second)) - return found->second; - else - return {nullptr,0u}; - } + auto buf = (_depth>0u && m_code) ? core::smart_refctd_ptr_static_cast(m_code->clone(_depth-1u)):m_code; + return core::make_smart_refctd_ptr(std::move(buf),m_contentType,std::string(m_filepathHint)); + } - // Returns negative on failure, otherwise the size of the buffer required to reserve for the spec constant data - inline int32_t valid() const - { - // Impossible to check: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pName-00707 - if (entryPoint.empty()) - return INVALID_SPEC_INFO; - - if (!shader) - return INVALID_SPEC_INFO; - const auto stage = shader->getStage(); - - // Shader stages already checked for validity w.r.t. features enabled, during unspec shader creation, only check: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-08988 - if (requireFullSubgroups) - switch (stage) - { - case E_SHADER_STAGE::ESS_COMPUTE: [[fallthrough]]; - case E_SHADER_STAGE::ESS_TASK: [[fallthrough]]; - case E_SHADER_STAGE::ESS_MESH: - break; - default: - return INVALID_SPEC_INFO; - break; - } - // Impossible to efficiently check anything from: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-maxClipDistances-00708 - // to: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-06686 - // and from: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02756 - // to: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-module-08987 - - int64_t specData = 0; - if (entries) - for (const auto& entry : *entries) - { - if (!entry.second) - return INVALID_SPEC_INFO; - specData += entry.second.size; - } - if (specData>0x7fffffff) - return INVALID_SPEC_INFO; - return static_cast(specData); - } - inline bool equalAllButShader(const SSpecInfo& other) const + // The file path hint is extemely important for resolving includes if the content type is NOT SPIR-V + inline const std::string& getFilepathHint() const { return m_filepathHint; } + bool setFilePathHint(std::string&& filepathHint) + { + if(!isMutable()) + return false; + m_filepathHint = std::move(filepathHint); + return true; + } + + // + const ICPUBuffer* getContent() const { return m_code.get(); }; + + // + inline E_CONTENT_TYPE getContentType() const { return m_contentType; } + inline bool isContentHighLevelLanguage() const + { + switch (m_contentType) { - if (entryPoint != other.entryPoint) - return false; - if ((!shader) != (!other.shader)) - return false; - if (requiredSubgroupSize != other.requiredSubgroupSize) - return false; - if (requireFullSubgroups != other.requireFullSubgroups) + case E_CONTENT_TYPE::ECT_SPIRV: return false; - - if (!entries) - return !other.entries; - if (entries->size()!=other.entries->size()) - return false; - for (const auto& entry : *other.entries) - { - const auto found = entries->find(entry.first); - if (found==entries->end()) - return false; - if (found->second!=entry.second) - return false; - } - - return true; + default: + break; } + return true; + } - inline operator SSpecInfo() const - { - return SSpecInfo{ - .entryPoint = entryPoint, - .shader = shader, - .entries = entries, - .requiredSubgroupSize = requiredSubgroupSize, - .requireFullSubgroups = requireFullSubgroups, - }; - } - - - std::string entryPoint = "main"; //!< A name of the function where the entry point of an shader executable begins. It's often "main" function. - ShaderType* shader = nullptr; - // Container choice implicitly satisfies: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSpecializationInfo.html#VUID-VkSpecializationInfo-constantID-04911 - const spec_constant_map_t* entries = nullptr; - // By requiring Nabla Core Profile features we implicitly satisfy: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-02784 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-flags-02785 - // Also because our API is sane, it satisfies the following by construction: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02754 - SUBGROUP_SIZE requiredSubgroupSize : 3 = SUBGROUP_SIZE::UNKNOWN; //!< Default value of 8 means no requirement - // Valid only for Compute, Mesh and Task shaders - uint8_t requireFullSubgroups : 1 = false; - static constexpr int32_t INVALID_SPEC_INFO = -1; - }; + // TODO: `void setContent(core::smart_refctd_ptr&&,const E_CONTENT_TYPE)` + + // alias for legacy reasons + using E_SHADER_STAGE = hlsl::ShaderStage; protected: - E_SHADER_STAGE m_shaderStage; + virtual ~IShader() = default; + + inline IAsset* getDependant_impl(const size_t ix) override {return m_code.get();} + std::string m_filepathHint; + core::smart_refctd_ptr m_code; + E_CONTENT_TYPE m_contentType; }; } diff --git a/include/nbl/asset/asset.h b/include/nbl/asset/asset.h index edf12af81d..84d9b9ccd2 100644 --- a/include/nbl/asset/asset.h +++ b/include/nbl/asset/asset.h @@ -40,7 +40,7 @@ #include "nbl/asset/ICPUAccelerationStructure.h" // shaders -#include "nbl/asset/ICPUShader.h" +#include "nbl/asset/IShader.h" #include "nbl/asset/utils/IShaderCompiler.h" #include "nbl/asset/utils/CGLSLCompiler.h" #include "nbl/asset/utils/CSPIRVIntrospector.h" diff --git a/include/nbl/asset/metadata/CHLSLMetadata.h b/include/nbl/asset/metadata/CHLSLMetadata.h new file mode 100644 index 0000000000..92e46a5148 --- /dev/null +++ b/include/nbl/asset/metadata/CHLSLMetadata.h @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef __NBL_ASSET_C_HLSL_METADATA_H_INCLUDED__ +#define __NBL_ASSET_C_HLSL_METADATA_H_INCLUDED__ + +#include "nbl/asset/metadata/IAssetMetadata.h" +#include "nbl/builtin/hlsl/enums.hlsl" + +namespace nbl +{ +namespace asset +{ + +class CHLSLMetadata final : public IAssetMetadata +{ + public: + explicit CHLSLMetadata(core::smart_refctd_dynamic_array&& shaderStages): shaderStages(shaderStages) {} + + _NBL_STATIC_INLINE_CONSTEXPR const char* LoaderName = "CHLSLLoader"; + const char* getLoaderName() const override { return LoaderName; } + + core::smart_refctd_dynamic_array shaderStages; +}; + +} +} + +#endif diff --git a/include/nbl/asset/utils/CCompilerSet.h b/include/nbl/asset/utils/CCompilerSet.h index 3e5fd0d6ce..244f465f78 100644 --- a/include/nbl/asset/utils/CCompilerSet.h +++ b/include/nbl/asset/utils/CCompilerSet.h @@ -1,7 +1,6 @@ // Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #ifndef _NBL_ASSET_C_COMPILER_SET_H_INCLUDED_ #define _NBL_ASSET_C_COMPILER_SET_H_INCLUDED_ @@ -11,8 +10,8 @@ namespace nbl::asset { - class NBL_API2 CCompilerSet : public core::IReferenceCounted - { +class NBL_API2 CCompilerSet : public core::IReferenceCounted +{ public: CCompilerSet(core::smart_refctd_ptr&& sys) : @@ -22,34 +21,32 @@ namespace nbl::asset m_GLSLCompiler(core::make_smart_refctd_ptr(core::smart_refctd_ptr(sys))) {} - core::smart_refctd_ptr compileToSPIRV(const asset::ICPUShader* shader, const IShaderCompiler::SCompilerOptions& options) const; + core::smart_refctd_ptr compileToSPIRV(const asset::IShader* shader, const IShaderCompiler::SCompilerOptions& options) const; - core::smart_refctd_ptr preprocessShader(const asset::ICPUShader* shader, const IShaderCompiler::SPreprocessorOptions& preprocessOptions) const; + core::smart_refctd_ptr preprocessShader(const asset::IShader* shader, hlsl::ShaderStage& stage, const IShaderCompiler::SPreprocessorOptions& preprocessOptions) const; inline core::smart_refctd_ptr getShaderCompiler(IShader::E_CONTENT_TYPE contentType) const { - - if (contentType == IShader::E_CONTENT_TYPE::ECT_HLSL) { - + if (contentType==IShader::E_CONTENT_TYPE::ECT_HLSL) + { #ifdef _NBL_PLATFORM_WINDOWS_ return m_HLSLCompiler; #else return nullptr; #endif } - else if (contentType == IShader::E_CONTENT_TYPE::ECT_GLSL) + else if (contentType==IShader::E_CONTENT_TYPE::ECT_GLSL) return m_GLSLCompiler; else return nullptr; } protected: - #ifdef _NBL_PLATFORM_WINDOWS_ core::smart_refctd_ptr m_HLSLCompiler = nullptr; #endif core::smart_refctd_ptr m_GLSLCompiler = nullptr; - }; +}; } #endif diff --git a/include/nbl/asset/utils/CGLSLCompiler.h b/include/nbl/asset/utils/CGLSLCompiler.h index 95ac7c8498..ea8dd550bc 100644 --- a/include/nbl/asset/utils/CGLSLCompiler.h +++ b/include/nbl/asset/utils/CGLSLCompiler.h @@ -45,7 +45,7 @@ class NBL_API2 CGLSLCompiler final : public IShaderCompiler @returns Shader containing SPIR-V bytecode. */ - core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const override; + core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const override; /* If original code contains #version specifier, @@ -53,7 +53,7 @@ class NBL_API2 CGLSLCompiler final : public IShaderCompiler beginning of the output buffer. */ template - static core::smart_refctd_ptr createOverridenCopy(const ICPUShader* original, const char* fmt, Args... args) + static core::smart_refctd_ptr createOverridenCopy(const IShader* original, const char* fmt, Args... args) { uint32_t position = 0u; if (original != nullptr) diff --git a/include/nbl/asset/utils/CHLSLCompiler.h b/include/nbl/asset/utils/CHLSLCompiler.h index 1fca392c90..e5fca33815 100644 --- a/include/nbl/asset/utils/CHLSLCompiler.h +++ b/include/nbl/asset/utils/CHLSLCompiler.h @@ -36,10 +36,10 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler IShader::E_CONTENT_TYPE getCodeContentType() const override { return IShader::E_CONTENT_TYPE::ECT_HLSL; }; }; - core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies = nullptr) const override; + core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies = nullptr) const override; template - static core::smart_refctd_ptr createOverridenCopy(const ICPUShader* original, const char* fmt, Args... args) + static core::smart_refctd_ptr createOverridenCopy(const IShader* original, const char* fmt, Args... args) { return IShaderCompiler::createOverridenCopy(original, 0u, fmt, args...); } diff --git a/include/nbl/asset/utils/CSPIRVIntrospector.h b/include/nbl/asset/utils/CSPIRVIntrospector.h index f756a58a42..3d6455e020 100644 --- a/include/nbl/asset/utils/CSPIRVIntrospector.h +++ b/include/nbl/asset/utils/CSPIRVIntrospector.h @@ -12,7 +12,7 @@ #include #include -#include "nbl/asset/ICPUShader.h" +#include "nbl/asset/IShader.h" #include "nbl/asset/ICPUImageView.h" #include "nbl/asset/ICPUComputePipeline.h" #include "nbl/asset/ICPURenderpassIndependentPipeline.h" @@ -379,7 +379,8 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable struct SParams { std::string entryPoint; - core::smart_refctd_ptr shader; + core::smart_refctd_ptr shader; + hlsl::ShaderStage stage; bool operator==(const SParams& rhs) const { @@ -387,12 +388,12 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable return false; if (!rhs.shader) return false; - if (shader->getStage() != rhs.shader->getStage()) - return false; if (shader->getContentType() != rhs.shader->getContentType()) return false; if (shader->getContent()->getSize() != rhs.shader->getContent()->getSize()) return false; + if (stage != rhs.stage) + return false; return memcmp(shader->getContent()->getPointer(), rhs.shader->getContent()->getPointer(), shader->getContent()->getSize()) == 0; } }; @@ -571,17 +572,17 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable uint32_t count : 31 = 0; uint32_t isRuntimeSizedFlag : 1; // Which shader stages touch it - core::bitflag stageMask = ICPUShader::E_SHADER_STAGE::ESS_UNKNOWN; + core::bitflag stageMask = hlsl::ShaderStage::ESS_UNKNOWN; }; // inline CPipelineIntrospectionData() { - std::fill(m_pushConstantBytes.begin(),m_pushConstantBytes.end(),ICPUShader::E_SHADER_STAGE::ESS_UNKNOWN); + std::fill(m_pushConstantBytes.begin(),m_pushConstantBytes.end(),hlsl::ShaderStage::ESS_UNKNOWN); std::fill(m_highestBindingNumbers.begin(), m_highestBindingNumbers.end(), HighestBindingData()); } // returns true if successfully added all the info to self, false if incompatible with what's already in our pipeline or incomplete (e.g. missing spec constants) - bool merge(const CStageIntrospectionData* stageData, const ICPUShader::SSpecInfoBase::spec_constant_map_t* specConstants=nullptr); + bool merge(const CStageIntrospectionData* stageData, const IPipelineBase::SShaderSpecInfo::spec_constant_map_t* specConstants=nullptr); // core::smart_refctd_dynamic_array createPushConstantRangesFromIntrospection(core::smart_refctd_ptr& introspection); @@ -590,7 +591,7 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable protected: // ESS_UNKNOWN on a byte means its not declared in any shader merged so far - std::array,MaxPushConstantsSize> m_pushConstantBytes; + std::array,MaxPushConstantsSize> m_pushConstantBytes; // struct Hash { @@ -641,11 +642,10 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable return introspection; } - //! creates pipeline for a single ICPUShader - core::smart_refctd_ptr createApproximateComputePipelineFromIntrospection(const ICPUShader::SSpecInfo& info, core::smart_refctd_ptr&& layout = nullptr); + //! creates pipeline for a single IShader + core::smart_refctd_ptr createApproximateComputePipelineFromIntrospection(const IPipelineBase::SShaderSpecInfo& info, core::smart_refctd_ptr&& layout=nullptr); -#if 0 // wait until Renderpass Indep completely gone and Graphics Pipeline is used in a new way - core::smart_refctd_ptr createApproximateRenderpassIndependentPipelineFromIntrospection(const std::span _infos); +#if 0 // wait until Renderpass Indep completely gone and Graphics Pipeline is used in a new way && Graphics Pipeline Libraries struct CShaderStages { const CStageIntrospectionData* vertex = nullptr; @@ -675,7 +675,7 @@ class NBL_API2 CSPIRVIntrospector : public core::Uncopyable size_t hash = stringViewHasher(code); core::hash_combine(hash, std::string_view(params.entryPoint)); - core::hash_combine(hash, static_cast(params.shader->getStage())); + core::hash_combine(hash, static_cast(params.stage)); return hash; } diff --git a/include/nbl/asset/utils/ISPIRVDebloater.h b/include/nbl/asset/utils/ISPIRVDebloater.h new file mode 100644 index 0000000000..f5f87956be --- /dev/null +++ b/include/nbl/asset/utils/ISPIRVDebloater.h @@ -0,0 +1,79 @@ +#ifndef _NBL_ASSET_I_SPIRV_DEBLOATER_H_INCLUDED_ +#define _NBL_ASSET_I_SPIRV_DEBLOATER_H_INCLUDED_ + +#include "nbl/core/declarations.h" + +#include "nbl/asset/ICPUBuffer.h" + +#include "nbl/system/ILogger.h" + +namespace nbl::asset +{ + +class ISPIRVDebloater final : public core::IReferenceCounted +{ + public: + ISPIRVDebloater(); + + struct Result + { + core::smart_refctd_ptr spirv; // nullptr if there is some entry point not found or spirv does not need to be debloated + bool isSuccess; + + inline operator bool() const + { + return isSuccess; + } + }; + + struct EntryPoint + { + std::string_view name; + hlsl::ShaderStage stage; + + inline bool operator==(const EntryPoint& rhs) const + { + if (stage != rhs.stage) return false; + return name == rhs.name; + } + + inline auto operator<=>(const EntryPoint& other) const + { + if (auto cmp = stage <=> other.stage; cmp != 0) + return cmp; + return name <=> other.name; + } + }; + + Result debloat(const ICPUBuffer* spirvBuffer, const core::set& entryPoints, system::logger_opt_ptr logger = nullptr) const; + + inline core::smart_refctd_ptr debloat(const IShader* shader, const core::set& entryPoints, system::logger_opt_ptr logger = nullptr) const + { + if (shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_SPIRV) + { + logger.log("shader content must be spirv!", system::ILogger::ELL_ERROR); + return nullptr; + } + const auto buffer = shader->getContent(); + const auto result = debloat(buffer, entryPoints, logger); + if (result && result.spirv.get() == nullptr) + { + // when debloat does not happen return original shader + return core::smart_refctd_ptr(shader); + } + + if (result.spirv.get() == nullptr) + { + return nullptr; + } + + return core::make_smart_refctd_ptr(core::smart_refctd_ptr(result.spirv), shader->getContentType(), std::string(shader->getFilepathHint())); + } + + private: + core::smart_refctd_ptr m_optimizer; +}; + +} + +#endif diff --git a/include/nbl/asset/utils/ISPIRVOptimizer.h b/include/nbl/asset/utils/ISPIRVOptimizer.h index 9d50cdbc0f..af59b8d7e2 100644 --- a/include/nbl/asset/utils/ISPIRVOptimizer.h +++ b/include/nbl/asset/utils/ISPIRVOptimizer.h @@ -18,6 +18,9 @@ class ISPIRVOptimizer final : public core::IReferenceCounted EOP_MERGE_RETURN, EOP_INLINE, EOP_ELIM_DEAD_FUNCTIONS, + EOP_ELIM_DEAD_VARIABLES, + EOP_ELIM_DEAD_CONSTANTS, + EOP_ELIM_DEAD_MEMBERS, EOP_SCALAR_REPLACEMENT, EOP_LOCAL_SINGLE_BLOCK_LOAD_STORE_ELIM, EOP_LOCAL_SINGLE_STORE_ELIM, @@ -34,7 +37,10 @@ class ISPIRVOptimizer final : public core::IReferenceCounted EOP_STRENGTH_REDUCTION, EOP_IF_CONVERSION, EOP_STRIP_DEBUG_INFO, + EOP_TRIM_CAPABILITIES, EOP_AGGRESSIVE_DCE, + EOP_REMOVE_UNUSED_INTERFACE_VARIABLES, + EOP_ELIMINATE_DEAD_INPUT_COMPONENTS_SAFE, EOP_COUNT }; diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 447a1ed416..d4b8e50119 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -10,7 +10,7 @@ #include "nbl/system/IFile.h" #include "nbl/system/ISystem.h" -#include "nbl/asset/ICPUShader.h" +#include "nbl/asset/IShader.h" #include "nbl/asset/utils/ISPIRVOptimizer.h" // Less leakage than "nlohmann/json.hpp" only forward declarations @@ -144,7 +144,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // Forward declaration for SCompilerOptions use struct CCache; /* - @stage shaderStage + @stage shaderStage, can be ESS_ALL_OR_LIBRARY to make multi-entrypoint shaders @targetSpirvVersion spirv version @entryPoint entryPoint @outAssembly Optional parameter; if not nullptr, SPIR-V assembly is saved in there. @@ -168,7 +168,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted virtual IShader::E_CONTENT_TYPE getCodeContentType() const { return IShader::E_CONTENT_TYPE::ECT_UNKNOWN; }; - IShader::E_SHADER_STAGE stage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; + IShader::E_SHADER_STAGE stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; const ISPIRVOptimizer* spirvOptimizer = nullptr; core::bitflag debugInfoFlags = core::bitflag(E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT) | E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT; @@ -371,7 +371,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted bool setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer); - core::smart_refctd_ptr decompressShader() const; + core::smart_refctd_ptr decompressShader() const; // TODO: make some of these private std::string mainFileContents; @@ -405,7 +405,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted return retVal; } - NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; + NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; inline CCache() {} @@ -439,16 +439,16 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; }; - core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; + core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; - inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const + inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const { if (!code) return nullptr; return compileToSPIRV({code,strlen(code)},options); } - inline core::smart_refctd_ptr compileToSPIRV(system::IFile* sourceFile, const SCompilerOptions& options) const + inline core::smart_refctd_ptr compileToSPIRV(system::IFile* sourceFile, const SCompilerOptions& options) const { size_t fileSize = sourceFile->getSize(); std::string code(fileSize,'\0'); @@ -463,7 +463,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted /** Resolves ALL #include directives regardless of any other preprocessor directive. - This is done in order to support `#include` AND simultaneulsy be able to store (serialize) such ICPUShader (mostly High Level source) into ONE file which, upon loading, will compile on every hardware/driver predicted by shader's author. + This is done in order to support `#include` AND simultaneulsy be able to store (serialize) such IShader (mostly High Level source) into ONE file which, upon loading, will compile on every hardware/driver predicted by shader's author. Internally function "disables" all preprocessor directives (so that they're not processed by preprocessor) except `#include` (and also `#version` and `#pragma shader_stage`). Note that among the directives there may be include guards. Because of that, maxSelfInclusionCount parameter is provided. @@ -491,7 +491,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted If original == nullptr, the output buffer will only contain the data from fmt. */ template - static core::smart_refctd_ptr createOverridenCopy(const ICPUShader* original, uint32_t position, const char* fmt, Args... args) + static core::smart_refctd_ptr createOverridenCopy(const IShader* original, uint32_t position, const char* fmt, Args... args) { if (!original || !original->isContentHighLevelLanguage()) return nullptr; @@ -548,7 +548,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // terminating char *outCode = 0; outBuffer->setContentHash(outBuffer->computeContentHash()); - return nbl::core::make_smart_refctd_ptr(std::move(outBuffer), original->getStage(), original->getContentType(), std::string(original->getFilepathHint())); + return nbl::core::make_smart_refctd_ptr(std::move(outBuffer), original->getContentType(), std::string(original->getFilepathHint())); } else { @@ -567,7 +567,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted protected: virtual void insertIntoStart(std::string& code, std::ostringstream&& ins) const = 0; - virtual core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const SCompilerOptions& options, std::vector* dependencies) const = 0; + virtual core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const SCompilerOptions& options, std::vector* dependencies) const = 0; core::smart_refctd_ptr m_system; diff --git a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl index 8e8195ff2a..a48d9b4623 100644 --- a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl +++ b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl @@ -21,7 +21,7 @@ const static float32_t2 tc[3] = { [[vk::constant_id(0)]] const uint32_t SwapchainTransform = 0; - +[shader("vertex")] SVertexAttributes main() { using namespace ::nbl::hlsl::glsl; diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index 82019327cb..801b867766 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -90,6 +90,14 @@ struct blake3_hasher::update_impl update_impl>::__call(hasher,input); } }; +template +struct blake3_hasher::update_impl,Dummy> +{ + static inline void __call(blake3_hasher& hasher, const std::basic_string_view input) + { + hasher.update(input.data(),input.size()*sizeof(CharT)); + } +}; } diff --git a/include/nbl/ext/FFT/FFT.h b/include/nbl/ext/FFT/FFT.h deleted file mode 100644 index fbc6c127da..0000000000 --- a/include/nbl/ext/FFT/FFT.h +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXT_FFT_INCLUDED_ -#define _NBL_EXT_FFT_INCLUDED_ - -#include "nabla.h" -#include "nbl/video/IGPUShader.h" -#include "nbl/asset/ICPUShader.h" - - -namespace nbl -{ -namespace ext -{ -namespace FFT -{ - -typedef uint32_t uint; -struct alignas(16) uvec3 -{ - uint x,y,z; -}; -struct alignas(16) uvec4 { - uint x,y,z,w; -}; -#include "nbl/builtin/glsl/ext/FFT/parameters_struct.glsl"; - -class FFT final : public core::IReferenceCounted -{ - public: - struct Parameters_t alignas(16) : nbl_glsl_ext_FFT_Parameters_t - { - inline uint getLog2FFTSize() - { - return (input_dimensions.w>>3u)&0x1fu; - } - }; - - struct DispatchInfo_t - { - uint32_t workGroupCount[3]; - }; - - _NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 256u; - FFT(video::IDriver* driver, uint32_t maxDimensionSize, bool useHalfStorage = false); - - // returns how many dispatches necessary for computing the FFT and fills the uniform data - template - static inline uint32_t buildParameters( - bool isInverse, uint32_t numChannels, const asset::VkExtent3D& inputDimensions, - Parameters_t* outParams, DispatchInfo_t* outInfos, const asset::ISampler::E_TEXTURE_CLAMP* paddingType, - const asset::VkExtent3D& extraPaddedInputDimensions, bool realInput = false - ) - { - uint32_t passesRequired = 0u; - - const auto paddedInputDimensions = padDimensions(extraPaddedInputDimensions); - - using SizeAxisPair = std::tuple; - std::array passes; - if (numChannels) - { - for (uint32_t i=0u; i<3u; i++) - { - auto dim = (&paddedInputDimensions.width)[i]; - if (dim<2u) - continue; - passes[passesRequired++] = {float(dim)/float((&inputDimensions.width)[i]),i,paddingType[i]}; - } - if (unconstrainedAxisOrder) - std::sort(passes.begin(),passes.begin()+passesRequired); - } - - auto computeOutputStride = [](const uvec3& output_dimensions, const auto axis, const auto nextAxis) -> uvec4 - { - // coord[axis] = 1u - // coord[nextAxis] = fftLen; - // coord[otherAxis] = fftLen*dimension[nextAxis]; - uvec4 stride; - stride.w = output_dimensions.x*output_dimensions.y*output_dimensions.z; - for (auto i=0u; i<3u; i++) - { - auto& coord = (&stride.x)[i]; - if (i!=axis) - { - coord = (&output_dimensions.x)[axis]; - if (i!=nextAxis) - coord *= (&output_dimensions.x)[nextAxis]; - } - else - coord = 1u; - } - return stride; - }; - - if (passesRequired) - { - uvec3 output_dimensions = {inputDimensions.width,inputDimensions.height,inputDimensions.depth}; - for (uint32_t i=0u; i(passes[i]); - const auto paddedAxisLen = (&paddedInputDimensions.width)[passAxis]; - { - assert(paddingType[i]<=asset::ISampler::E_TEXTURE_CLAMP::ETC_MIRROR); - params.input_dimensions.w = (isInverse ? 0x80000000u:0x0u)| - (passAxis<<28u)| // direction - ((numChannels-1u)<<26u)| // max channel - (hlsl::findMSB(paddedAxisLen)<<3u)| // log2(fftSize) - uint32_t(std::get<2u>(passes[i])); - } - - (&output_dimensions.x)[passAxis] = paddedAxisLen; - if (i) - params.input_strides = outParams[i-1u].output_strides; - else // TODO provide an override for input strides - { - params.input_strides.x = 1u; - params.input_strides.y = inputDimensions.width; - params.input_strides.z = params.input_strides.y * inputDimensions.height; - params.input_strides.w = params.input_strides.z * inputDimensions.depth; - } - params.output_strides = computeOutputStride(output_dimensions,passAxis,std::get<1u>(passes[(i+1u)%passesRequired])); - - auto& dispatch = outInfos[i]; - dispatch.workGroupCount[0] = output_dimensions.x; - dispatch.workGroupCount[1] = output_dimensions.y; - dispatch.workGroupCount[2] = output_dimensions.z; - dispatch.workGroupCount[passAxis] = 1u; - } - } - - return passesRequired; - } - static inline uint32_t buildParameters( - bool isInverse, uint32_t numChannels, const asset::VkExtent3D& inputDimensions, - Parameters_t* outParams, DispatchInfo_t* outInfos, const asset::ISampler::E_TEXTURE_CLAMP* paddingType - ) - { - return buildParameters(isInverse,numChannels,inputDimensions,outParams,outInfos,paddingType,inputDimensions); - } - - static inline asset::VkExtent3D padDimensions(asset::VkExtent3D dimension) - { - static_assert(core::isPoT(MINIMUM_FFT_SIZE),"MINIMUM_FFT_SIZE needs to be Power of Two!"); - for (auto i=0u; i<3u; i++) - { - auto& coord = (&dimension.width)[i]; - if (coord<=1u) - continue; - coord = core::max(core::roundUpToPoT(coord),MINIMUM_FFT_SIZE); - } - return dimension; - } - - // - static core::SRange getDefaultPushConstantRanges(); - - // - inline auto getDefaultDescriptorSetLayout() const {return m_dsLayout.get();} - - // - inline auto getDefaultPipelineLayout() const {return m_pplnLayout.get();} - - // - inline auto getDefaultPipeline() const {return m_ppln.get();} - - // - inline uint32_t getMaxFFTLength() const { return m_maxFFTLen; } - inline bool usesHalfFloatStorage() const { return m_halfFloatStorage; } - - // - static inline size_t getOutputBufferSize(bool _halfFloatStorage, const asset::VkExtent3D& inputDimensions, uint32_t numChannels, bool realInput=false) - { - size_t retval = getOutputBufferSize_impl(inputDimensions,numChannels); - if (!realInput) - retval <<= 1u; - return retval*(_halfFloatStorage ? sizeof(uint16_t):sizeof(uint32_t)); - } - inline size_t getOutputBufferSize(const asset::VkExtent3D& inputDimensions, uint32_t numChannels, bool realInput = false) - { - return getOutputBufferSize(m_halfFloatStorage,inputDimensions,numChannels,realInput); - } - - static void updateDescriptorSet( - video::IVideoDriver* driver, - video::IGPUDescriptorSet* set, - core::smart_refctd_ptr inputBufferDescriptor, - core::smart_refctd_ptr outputBufferDescriptor); - - static inline void dispatchHelper( - video::IVideoDriver* driver, - const video::IGPUPipelineLayout* pipelineLayout, - const Parameters_t& params, - const DispatchInfo_t& dispatchInfo, - bool issueDefaultBarrier=true) - { - driver->pushConstants(pipelineLayout,video::IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(Parameters_t),¶ms); - driver->dispatch(dispatchInfo.workGroupCount[0],dispatchInfo.workGroupCount[1],dispatchInfo.workGroupCount[2]); - - if (issueDefaultBarrier) - defaultBarrier(); - } - - static void defaultBarrier(); - - private: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t MINIMUM_FFT_SIZE = DEFAULT_WORK_GROUP_SIZE<<1u; - ~FFT() {} - - // - static inline size_t getOutputBufferSize_impl(const asset::VkExtent3D& inputDimensions, uint32_t numChannels) - { - const auto paddedInputDimensions = padDimensions(inputDimensions); - return paddedInputDimensions.width*paddedInputDimensions.height*paddedInputDimensions.depth*numChannels; - } - - core::smart_refctd_ptr m_dsLayout; - core::smart_refctd_ptr m_pplnLayout; - core::smart_refctd_ptr m_ppln; - uint32_t m_maxFFTLen; - bool m_halfFloatStorage; -}; - - -} -} -} - -#endif diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 458c928ce1..4e7147c904 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -10,7 +10,7 @@ namespace nbl::ext::FullScreenTriangle { struct ProtoPipeline final { - inline core::smart_refctd_ptr createDefaultVertexShader(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr) + inline core::smart_refctd_ptr createDefaultVertexShader(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr) { if (!assMan || !device) return nullptr; @@ -24,11 +24,11 @@ struct ProtoPipeline final if (assets.empty()) return nullptr; - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); if (!source) return nullptr; - return device->createShader(source.get()); + return device->compileShader({ .source = source.get(), .stage = hlsl::ESS_VERTEX }); } public: @@ -40,7 +40,7 @@ struct ProtoPipeline final inline operator bool() const {return m_vxShader.get();} inline core::smart_refctd_ptr createPipeline( - const video::IGPUShader::SSpecInfo& fragShader, + const asset::IPipelineBase::SShaderSpecInfo& fragShader, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, const uint32_t subpassIx=0, @@ -58,11 +58,11 @@ struct ProtoPipeline final { const auto orientationAsUint32 = static_cast(swapchainTransform); - IGPUShader::SSpecInfo::spec_constant_map_t specConstants; + asset::IPipelineBase::SShaderSpecInfo::spec_constant_map_t specConstants; specConstants[0] = {.data=&orientationAsUint32,.size=sizeof(orientationAsUint32)}; - const IGPUShader::SSpecInfo shaders[2] = { - {.shader=m_vxShader.get(),.entries=&specConstants}, + const asset::IPipelineBase::SShaderSpecInfo shaders[2] = { + {.shader=m_vxShader.get(), .entryPoint = "main" ,.stage = hlsl::ESS_VERTEX,.entries=&specConstants}, fragShader }; @@ -85,7 +85,7 @@ struct ProtoPipeline final } - core::smart_refctd_ptr m_vxShader; + core::smart_refctd_ptr m_vxShader; // The default is correct for us constexpr static inline asset::SRasterizationParams DefaultRasterParams = { .faceCullingMode = asset::EFCM_NONE, diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index c43f898264..4232860baa 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -446,25 +446,25 @@ inline VkAccessFlagBits2 getVkAccessFlagsFromAccessFlags(core::bitflag in) +inline VkShaderStageFlags getVkShaderStageFlagsFromShaderStage(const core::bitflag in) { VkShaderStageFlags ret = 0u; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_VERTEX)) ret |= VK_SHADER_STAGE_VERTEX_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL)) ret |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION)) ret |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_GEOMETRY)) ret |= VK_SHADER_STAGE_GEOMETRY_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT)) ret |= VK_SHADER_STAGE_FRAGMENT_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_COMPUTE)) ret |= VK_SHADER_STAGE_COMPUTE_BIT; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_TASK)) ret |= VK_SHADER_STAGE_TASK_BIT_NV; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_MESH)) ret |= VK_SHADER_STAGE_MESH_BIT_NV; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_RAYGEN)) ret |= VK_SHADER_STAGE_RAYGEN_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ANY_HIT)) ret |= VK_SHADER_STAGE_ANY_HIT_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT)) ret |= VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_MISS)) ret |= VK_SHADER_STAGE_MISS_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_INTERSECTION)) ret |= VK_SHADER_STAGE_INTERSECTION_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_CALLABLE)) ret |= VK_SHADER_STAGE_CALLABLE_BIT_KHR; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ALL_GRAPHICS)) ret |= VK_SHADER_STAGE_ALL_GRAPHICS; - if(in.hasFlags(IGPUShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY)) ret |= VK_SHADER_STAGE_ALL; + if(in.hasFlags(hlsl::ShaderStage::ESS_VERTEX)) ret |= VK_SHADER_STAGE_VERTEX_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_TESSELLATION_CONTROL)) ret |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION)) ret |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_GEOMETRY)) ret |= VK_SHADER_STAGE_GEOMETRY_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_FRAGMENT)) ret |= VK_SHADER_STAGE_FRAGMENT_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_COMPUTE)) ret |= VK_SHADER_STAGE_COMPUTE_BIT; + if(in.hasFlags(hlsl::ShaderStage::ESS_TASK)) ret |= VK_SHADER_STAGE_TASK_BIT_NV; + if(in.hasFlags(hlsl::ShaderStage::ESS_MESH)) ret |= VK_SHADER_STAGE_MESH_BIT_NV; + if(in.hasFlags(hlsl::ShaderStage::ESS_RAYGEN)) ret |= VK_SHADER_STAGE_RAYGEN_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_ANY_HIT)) ret |= VK_SHADER_STAGE_ANY_HIT_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_CLOSEST_HIT)) ret |= VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_MISS)) ret |= VK_SHADER_STAGE_MISS_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_INTERSECTION)) ret |= VK_SHADER_STAGE_INTERSECTION_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_CALLABLE)) ret |= VK_SHADER_STAGE_CALLABLE_BIT_KHR; + if(in.hasFlags(hlsl::ShaderStage::ESS_ALL_GRAPHICS)) ret |= VK_SHADER_STAGE_ALL_GRAPHICS; + if(in.hasFlags(hlsl::ShaderStage::ESS_ALL_OR_LIBRARY)) ret |= VK_SHADER_STAGE_ALL; return ret; } diff --git a/include/nbl/video/CVulkanRayTracingPipeline.h b/include/nbl/video/CVulkanRayTracingPipeline.h index ca14d44ee9..82d8c777b6 100644 --- a/include/nbl/video/CVulkanRayTracingPipeline.h +++ b/include/nbl/video/CVulkanRayTracingPipeline.h @@ -4,16 +4,11 @@ #include "nbl/video/IGPURayTracingPipeline.h" -#include "nbl/video/CVulkanShader.h" - - namespace nbl::video { class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline { - using ShaderRef = core::smart_refctd_ptr; - using ShaderContainer = core::smart_refctd_dynamic_array; using GeneralGroupStackSizeContainer = core::smart_refctd_dynamic_array; using HitGroupStackSizeContainer = core::smart_refctd_dynamic_array; @@ -45,7 +40,6 @@ class CVulkanRayTracingPipeline final : public IGPURayTracingPipeline ~CVulkanRayTracingPipeline() override; const VkPipeline m_vkPipeline; - ShaderContainer m_shaders; ShaderGroupHandleContainer m_shaderGroupHandles; uint16_t m_raygenStackSize; core::smart_refctd_dynamic_array m_missStackSizes; diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index cfe0439cde..f79ed17a50 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -7,7 +7,6 @@ #include "nbl/builtin/hlsl/indirect_commands.hlsl" -#include "nbl/video/IGPUShader.h" #include "nbl/video/IGPUCommandPool.h" #include "nbl/video/IQueue.h" @@ -322,7 +321,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount=0u, const uint32_t* const dynamicOffsets=nullptr ); - bool pushConstants(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues); + bool pushConstants(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues); bool bindVertexBuffers(const uint32_t firstBinding, const uint32_t bindingCount, const asset::SBufferBinding* const pBindings); bool bindIndexBuffer(const asset::SBufferBinding& binding, const asset::E_INDEX_TYPE indexType); @@ -640,7 +639,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr ) = 0; - virtual bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) = 0; + virtual bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) = 0; virtual bool bindVertexBuffers_impl(const uint32_t firstBinding, const uint32_t bindingCount, const asset::SBufferBinding* const pBindings) = 0; virtual bool bindIndexBuffer_impl(const asset::SBufferBinding& binding, const asset::E_INDEX_TYPE indexType) = 0; diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index 4d0fbaa39f..49e44dfcc1 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -50,7 +50,7 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipelinegetStage()!=IGPUShader::E_SHADER_STAGE::ESS_COMPUTE) + if (!layout || shader.stage!=hlsl::ShaderStage::ESS_COMPUTE) return {}; uint32_t count = 0; @@ -63,11 +63,11 @@ class IGPUComputePipeline : public IBackendObject, public asset::IPipeline(dataSize)}; } - inline std::span getShaders() const {return {&shader,1}; } + inline std::span getShaders() const {return {&shader,1}; } // TODO: Could guess the required flags from SPIR-V introspection of declared caps core::bitflag flags = FLAGS::NONE; - IGPUShader::SSpecInfo shader = {}; + IPipelineBase::SShaderSpecInfo shader = {}; }; inline core::bitflag getCreationFlags() const {return m_flags;} diff --git a/include/nbl/video/IGPUGraphicsPipeline.h b/include/nbl/video/IGPUGraphicsPipeline.h index 5f4e61c0d9..8240bcea94 100644 --- a/include/nbl/video/IGPUGraphicsPipeline.h +++ b/include/nbl/video/IGPUGraphicsPipeline.h @@ -5,16 +5,15 @@ #include "nbl/asset/IGraphicsPipeline.h" #include "nbl/video/IGPUPipelineLayout.h" -#include "nbl/video/IGPUShader.h" #include "nbl/video/IGPURenderpass.h" namespace nbl::video { -class IGPUGraphicsPipeline : public IBackendObject, public asset::IGraphicsPipeline +class IGPUGraphicsPipeline : public IBackendObject, public asset::IGraphicsPipeline { - using pipeline_t = asset::IGraphicsPipeline; + using pipeline_t = asset::IGraphicsPipeline; public: struct SCreationParams final : pipeline_t::SCreationParams, SPipelineCreationParams @@ -37,7 +36,7 @@ class IGPUGraphicsPipeline : public IBackendObject, public asset::IGraphicsPipel if (!layout) return {}; SSpecializationValidationResult retval = {.count=0,.dataSize=0}; - const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const IGPUShader::SSpecInfo& info)->bool + const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const IPipelineBase::SShaderSpecInfo& info)->bool { const auto dataSize = info.valid(); if (dataSize<0) @@ -56,7 +55,7 @@ class IGPUGraphicsPipeline : public IBackendObject, public asset::IGraphicsPipel return retval; } - inline std::span getShaders() const {return shaders;} + inline std::span getShaders() const {return shaders;} // TODO: Could guess the required flags from SPIR-V introspection of declared caps core::bitflag flags = FLAGS::NONE; diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 2d0b8961f9..fb8c371193 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -10,9 +10,9 @@ namespace nbl::video { -class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingPipeline +class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingPipeline { - using pipeline_t = asset::IRayTracingPipeline; + using pipeline_t = asset::IRayTracingPipeline; public: @@ -42,7 +42,7 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP .count=0, .dataSize=0, }; - const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const IGPUShader::SSpecInfo& info)->bool + const bool valid = pipeline_t::SCreationParams::impl_valid([&retval](const asset::IPipelineBase::SShaderSpecInfo& info)->bool { const auto dataSize = info.valid(); if (dataSize<0) @@ -61,7 +61,7 @@ class IGPURayTracingPipeline : public IBackendObject, public asset::IRayTracingP return retval; } - inline std::span getShaders() const { return shaders; } + inline std::span getShaders() const { return shaders; } }; diff --git a/include/nbl/video/IGPUShader.h b/include/nbl/video/IGPUShader.h deleted file mode 100644 index ccf503b009..0000000000 --- a/include/nbl/video/IGPUShader.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_VIDEO_I_GPU_SHADER_H_INCLUDED_ -#define _NBL_VIDEO_I_GPU_SHADER_H_INCLUDED_ - -#include "nbl/core/IReferenceCounted.h" - -#include "nbl/asset/IShader.h" - -#include "nbl/video/decl/IBackendObject.h" -#include "nbl/video/decl/IBackendObject.h" - -namespace nbl::video -{ - -//! GPU Version of Unspecialized Shader -/* - @see IReferenceCounted -*/ - -class IGPUShader : public asset::IShader, public IBackendObject -{ - public: - using SSpecInfo = asset::IShader::SSpecInfo; - - protected: - explicit IGPUShader(core::smart_refctd_ptr&& dev, const IShader::E_SHADER_STAGE shaderStage, std::string&& filepathHint) - : IBackendObject(std::move(dev)), IShader(shaderStage, std::move(filepathHint)) {} - - virtual ~IGPUShader() = default; -}; - -} - -#endif diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index f2998d8e8c..49364f3a54 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -3,6 +3,7 @@ #include "nbl/asset/asset.h" #include "nbl/asset/utils/ISPIRVOptimizer.h" +#include "nbl/asset/utils/ISPIRVDebloater.h" #include "nbl/asset/utils/CCompilerSet.h" #include "nbl/video/SPhysicalDeviceFeatures.h" @@ -707,19 +708,16 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe //! Shaders - struct SShaderCreationParameters { - const asset::ICPUShader* cpushader; - const asset::ISPIRVOptimizer* optimizer; - asset::IShaderCompiler::CCache* readCache; - asset::IShaderCompiler::CCache* writeCache; + struct SShaderCreationParameters + { + const asset::IShader* source; + const asset::ISPIRVOptimizer* optimizer = nullptr; + asset::IShaderCompiler::CCache* readCache = nullptr; + asset::IShaderCompiler::CCache* writeCache = nullptr; + std::span extraDefines = {}; + hlsl::ShaderStage stage = hlsl::ShaderStage::ESS_ALL_OR_LIBRARY; }; - - core::smart_refctd_ptr compileShader(const SShaderCreationParameters& creationParams); - - // New version below has caching options - [[deprecated]] - core::smart_refctd_ptr createShader(const asset::ICPUShader* cpushader, const asset::ISPIRVOptimizer* optimizer=nullptr); - core::smart_refctd_ptr createShader(const SShaderCreationParameters& creationParams); + core::smart_refctd_ptr compileShader(const SShaderCreationParameters& creationParams); //! Layouts // Create a descriptor set layout (@see ICPUDescriptorSetLayout) @@ -757,7 +755,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe NBL_LOG_ERROR("Number of push constants ranges exceeds device limits"); return nullptr; } - core::bitflag stages = IGPUShader::E_SHADER_STAGE::ESS_UNKNOWN; + core::bitflag stages = hlsl::ShaderStage::ESS_UNKNOWN; uint32_t maxPCByte = 0u; for (auto range : pcRanges) { @@ -907,47 +905,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return createPipelineCache(initialData,notThreadsafe); } - inline bool createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output) - { - std::fill_n(output,params.size(),nullptr); - IGPUComputePipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const IGPUShader::SSpecInfo& info)->bool - { - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-08771 - if (!info.shader->wasCreatedBy(this)) - { - NBL_LOG_ERROR("The shader was not created by this device"); - return false; - } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02755 - if (info.requiredSubgroupSize >= IGPUShader::SSpecInfo::SUBGROUP_SIZE::REQUIRE_4 && !getPhysicalDeviceLimits().requiredSubgroupSizeStages.hasFlags(info.shader->getStage())) - { - NBL_LOG_ERROR("Invalid shader stage"); - return false; - } - return true; - }); - if (!specConstantValidation) - { - NBL_LOG_ERROR("Invalid parameters were given"); - return false; - } - - createComputePipelines_impl(pipelineCache,params,output,specConstantValidation); - - bool retval = true; - for (auto i=0u; igetObjectDebugName(); - if (!output[i]) - { - NBL_LOG_ERROR("ComputeShader was not created (params[%u])" , i); - retval = false; - } - else if (debugName && debugName[0]) - output[i]->setObjectDebugName(debugName); - } - return retval; - } + bool createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output); bool createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, @@ -1101,8 +1059,6 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe virtual DEFERRABLE_RESULT copyAccelerationStructureToMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure::HostCopyToMemoryInfo& copyInfo) = 0; virtual DEFERRABLE_RESULT copyAccelerationStructureFromMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure::HostCopyFromMemoryInfo& copyInfo) = 0; - virtual core::smart_refctd_ptr createShader_impl(const asset::ICPUShader* spirvShader) = 0; - constexpr static inline auto MaxStagesPerPipeline = 6u; virtual core::smart_refctd_ptr createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) = 0; virtual core::smart_refctd_ptr createPipelineLayout_impl( @@ -1193,12 +1149,70 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return {}; } + const auto& features = getEnabledFeatures(); for (auto info : ci.getShaders()) - if (info.shader && !extra(info)) + if (info.shader) + { + const asset::IShader::E_SHADER_STAGE shaderStage = info.stage; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00704 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00705 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-02091 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-02092 + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00706 + switch (shaderStage) + { + case hlsl::ShaderStage::ESS_TESSELLATION_CONTROL: [[fallthrough]]; + case hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION: + if (!features.tessellationShader) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", info.shader); + return {}; + } + break; + case hlsl::ShaderStage::ESS_GEOMETRY: + if (!features.geometryShader) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", info.shader); + return {}; + } + break; + case hlsl::ShaderStage::ESS_ALL_OR_LIBRARY: [[fallthrough]]; + case hlsl::ShaderStage::ESS_VERTEX: [[fallthrough]]; + case hlsl::ShaderStage::ESS_FRAGMENT: [[fallthrough]]; + case hlsl::ShaderStage::ESS_COMPUTE: + break; + // unsupported yet + case hlsl::ShaderStage::ESS_TASK: [[fallthrough]]; + case hlsl::ShaderStage::ESS_MESH: + NBL_LOG_ERROR("Unsupported (yet) shader stage"); + return {}; + break; + case hlsl::ShaderStage::ESS_RAYGEN: [[fallthrough]]; + case hlsl::ShaderStage::ESS_ANY_HIT: [[fallthrough]]; + case hlsl::ShaderStage::ESS_CLOSEST_HIT: [[fallthrough]]; + case hlsl::ShaderStage::ESS_MISS: [[fallthrough]]; + case hlsl::ShaderStage::ESS_INTERSECTION: [[fallthrough]]; + case hlsl::ShaderStage::ESS_CALLABLE: + if (!features.rayTracingPipeline) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Raytracing Pipeline feature not enabled!", info.shader); + return {}; + } + break; + default: + // Implicit unsupported stages or weird multi-bit stage enum values + NBL_LOG_ERROR("Unknown Shader Stage %d", shaderStage); + return {}; + break; + } + + if (!extra(info)) { NBL_LOG_ERROR("Invalid shader were specified (params[%d])", i); return {}; } + } retval += validation; } @@ -1248,6 +1262,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe uint16_t firstQueueIndex = 0u; }; const std::array m_queueFamilyInfos; + core::smart_refctd_ptr m_spirvDebloater; private: const SPhysicalDeviceLimits& getPhysicalDeviceLimits() const; diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index 77bab76f64..ee7d068ef3 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -4,8 +4,7 @@ #ifndef _NBL_VIDEO_ASSET_TRAITS_H_INCLUDED_ #define _NBL_VIDEO_ASSET_TRAITS_H_INCLUDED_ -#include "nbl/asset/ICPUShader.h" -#include "nbl/video/IGPUShader.h" +#include "nbl/asset/IShader.h" #include "nbl/asset/ICPUBufferView.h" #include "nbl/video/IGPUBufferView.h" #include "nbl/asset/ICPUDescriptorSet.h" @@ -42,14 +41,14 @@ struct asset_traits }; template<> -struct asset_traits +struct asset_traits { // the asset type - using asset_t = asset::ICPUShader; + using asset_t = asset::IShader; // we don't need to descend during DFS into other assets constexpr static inline bool HasChildren = false; // the video type - using video_t = IGPUShader; + using video_t = asset::IShader; // lookup type using lookup_t = const video_t*; }; diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h index 02d43cff69..db61ee7857 100644 --- a/include/nbl/video/utilities/CAssetConverter.h +++ b/include/nbl/video/utilities/CAssetConverter.h @@ -37,7 +37,7 @@ class CAssetConverter : public core::IReferenceCounted // Descriptor Set -> unique layout, using supported_asset_types = core::type_list< asset::ICPUSampler, - asset::ICPUShader, + asset::IShader, asset::ICPUBuffer, asset::ICPUBottomLevelAccelerationStructure, asset::ICPUTopLevelAccelerationStructure, @@ -128,22 +128,6 @@ class CAssetConverter : public core::IReferenceCounted } }; template<> - struct NBL_API2 patch_impl_t - { - public: - PATCH_IMPL_BOILERPLATE(asset::ICPUShader); - - using shader_stage_t = asset::IShader::E_SHADER_STAGE; - shader_stage_t stage = shader_stage_t::ESS_UNKNOWN; - - protected: - inline std::pair combine(const this_t& other) const - { - // because of the assumption that we'll only be combining valid patches, we can't have the stages differ - return {stage==other.stage,*this}; - } - }; - template<> struct NBL_API2 patch_impl_t { public: @@ -414,7 +398,7 @@ class CAssetConverter : public core::IReferenceCounted public: PATCH_IMPL_BOILERPLATE(asset::ICPUPipelineLayout); - using shader_stage_t = asset::IShader::E_SHADER_STAGE; + using shader_stage_t = hlsl::ShaderStage; std::array,asset::CSPIRVIntrospector::MaxPushConstantsSize> pushConstantBytes = {shader_stage_t::ESS_UNKNOWN}; protected: @@ -546,7 +530,6 @@ class CAssetConverter : public core::IReferenceCounted { public: virtual const patch_t* operator()(const lookup_t&) const = 0; - virtual const patch_t* operator()(const lookup_t&) const = 0; virtual const patch_t* operator()(const lookup_t&) const = 0; virtual const patch_t* operator()(const lookup_t&) const = 0; virtual const patch_t* operator()(const lookup_t&) const = 0; @@ -556,6 +539,10 @@ class CAssetConverter : public core::IReferenceCounted virtual const patch_t* operator()(const lookup_t&) const = 0; // certain items are not patchable, so there's no `patch_t` with non zero size + inline const patch_t* operator()(const lookup_t& unpatchable) const + { + return unpatchable.patch; + } inline const patch_t* operator()(const lookup_t& unpatchable) const { return unpatchable.patch; @@ -667,7 +654,7 @@ class CAssetConverter : public core::IReferenceCounted struct NBL_API2 hash_impl : hash_impl_base { bool operator()(lookup_t); - bool operator()(lookup_t); + bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); @@ -1087,6 +1074,9 @@ class CAssetConverter : public core::IReferenceCounted // we don't insert into the writeCache until conversions are successful core::tuple_transform_t m_stagingCaches; + // converted IShaders do not have any object that hold a smartptr into them, so we have to persist them in this vector to prevent m_stagingCacheds hold a raw dangling pointer into them + core::vector> m_shaders; + // need a more explicit list of GPU objects that need device-assisted conversion template struct SConversionRequestBase diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index a06f8914c4..9a02915187 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -12,7 +12,7 @@ class CComputeBlit : public core::IReferenceCounted { public: constexpr static inline asset::SPushConstantRange DefaultPushConstantRange = { - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, .offset = 0ull, .size = sizeof(hlsl::blit::Parameters) }; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index ad464bd035..8f0edb1056 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -162,6 +162,7 @@ set(NBL_ASSET_SOURCES # Shaders ${NBL_ROOT_PATH}/src/nbl/asset/utils/ISPIRVOptimizer.cpp + ${NBL_ROOT_PATH}/src/nbl/asset/utils/ISPIRVDebloater.cpp ${NBL_ROOT_PATH}/src/nbl/asset/utils/IShaderCompiler.cpp ${NBL_ROOT_PATH}/src/nbl/asset/utils/CGLSLCompiler.cpp ${NBL_ROOT_PATH}/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -256,7 +257,6 @@ set(NBL_VIDEO_SOURCES ${NBL_ROOT_PATH}/src/nbl/video/CVulkanRenderpass.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanImageView.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanCommandBuffer.cpp - ${NBL_ROOT_PATH}/src/nbl/video/CVulkanShader.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanCommandPool.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanBuffer.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanDescriptorSetLayout.cpp diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index a6ec07a010..26b029585b 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -276,7 +276,7 @@ void IAssetManager::insertBuiltinAssets() asset::ICPUDescriptorSetLayout::SBinding binding1; binding1.count = 1u; binding1.binding = 0u; - binding1.stageFlags = static_cast(asset::ICPUShader::E_SHADER_STAGE::ESS_VERTEX | asset::ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT); + binding1.stageFlags = static_cast(hlsl::ShaderStage::ESS_VERTEX | hlsl::ShaderStage::ESS_FRAGMENT); binding1.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; auto ds1Layout = core::make_smart_refctd_ptr(&binding1, &binding1 + 1); @@ -290,7 +290,7 @@ void IAssetManager::insertBuiltinAssets() binding3.binding = 0u; binding3.type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; binding3.count = 1u; - binding3.stageFlags = static_cast(asset::ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT); + binding3.stageFlags = static_cast(hlsl::ShaderStage::ESS_FRAGMENT); binding3.immutableSamplers = nullptr; auto ds3Layout = core::make_smart_refctd_ptr(&binding3, &binding3 + 1); @@ -392,7 +392,7 @@ void IAssetManager::insertBuiltinAssets() bnd.count = 1u; bnd.binding = 0u; //maybe even ESS_ALL_GRAPHICS? - bnd.stageFlags = static_cast(asset::ICPUShader::E_SHADER_STAGE::ESS_VERTEX | asset::ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT); + bnd.stageFlags = static_cast(hlsl::ShaderStage::ESS_VERTEX | hlsl::ShaderStage::ESS_FRAGMENT); bnd.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; defaultDs1Layout = core::make_smart_refctd_ptr(&bnd, &bnd+1); //it's intentionally added to cache later, see comments below, dont touch this order of insertions @@ -422,7 +422,7 @@ void IAssetManager::insertBuiltinAssets() asset::ICPUDescriptorSetLayout::SBinding bnd; bnd.count = 1u; bnd.binding = 0u; - bnd.stageFlags = static_cast(asset::ICPUShader::E_SHADER_STAGE::ESS_VERTEX | asset::ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT); + bnd.stageFlags = static_cast(hlsl::ShaderStage::ESS_VERTEX | hlsl::ShaderStage::ESS_FRAGMENT); bnd.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; auto ds1Layout = core::make_smart_refctd_ptr(&bnd, &bnd + 1); diff --git a/src/nbl/asset/interchange/CGLSLLoader.cpp b/src/nbl/asset/interchange/CGLSLLoader.cpp index b7f61ae85c..ceb1086aa5 100644 --- a/src/nbl/asset/interchange/CGLSLLoader.cpp +++ b/src/nbl/asset/interchange/CGLSLLoader.cpp @@ -3,7 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/asset.h" -#include "CGLSLLoader.h" +#include "nbl/asset/interchange/CGLSLLoader.h" using namespace nbl; using namespace nbl::asset; @@ -21,36 +21,14 @@ SAssetBundle CGLSLLoader::loadAsset(system::IFile* _file, const IAssetLoader::SA _file->read(success, source, 0, len); if (!success) return {}; - reinterpret_cast(source)[len] = 0; - const auto filename = _file->getFileName(); - //! TODO: Actually invoke the GLSL compiler to decode our type from any `#pragma`s - std::filesystem::path extension = filename.extension(); - - - core::unordered_map typeFromExt = { - {".vert",IShader::E_SHADER_STAGE::ESS_VERTEX}, - {".tesc",IShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL}, - {".tese",IShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION}, - {".geom",IShader::E_SHADER_STAGE::ESS_GEOMETRY}, - {".frag",IShader::E_SHADER_STAGE::ESS_FRAGMENT}, - {".comp",IShader::E_SHADER_STAGE::ESS_COMPUTE} - }; - auto found = typeFromExt.find(extension.string()); - if (found == typeFromExt.end()) - { - _NBL_ALIGNED_FREE(source); - return {}; - } - - auto shader = core::make_smart_refctd_ptr(reinterpret_cast(source), found->second, IShader::E_CONTENT_TYPE::ECT_GLSL, filename.string()); + auto shader = core::make_smart_refctd_ptr(reinterpret_cast(source), IShader::E_CONTENT_TYPE::ECT_GLSL, filename.string()); { auto backingBuffer = shader->getContent(); const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); } _NBL_ALIGNED_FREE(source); - return SAssetBundle(nullptr,{ std::move(shader) }); } diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index 0ad1e72f3b..8d6f575ae5 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -29,7 +29,7 @@ CGraphicsPipelineLoaderMTL::CGraphicsPipelineLoaderMTL(IAssetManager* _am, core: { #if 0 // Remove IRenderpassIndependentPipelines and use MC for Mesh Loaders //create vertex shaders and insert them into cache - auto registerShader = [&](ICPUShader::E_SHADER_STAGE stage) -> void + auto registerShader = [&](hlsl::ShaderStage stage) -> void { auto fileSystem = m_assetMgr->getSystem(); @@ -85,7 +85,7 @@ void CGraphicsPipelineLoaderMTL::initialize() // precompute the no UV pipeline layout { SPushConstantRange pcRng; - pcRng.stageFlags = ICPUShader::E_SHADER_STAGE::ESS_FRAGMENT; + pcRng.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT; pcRng.offset = 0u; pcRng.size = sizeof(SMtl::params); //if intellisense shows error here, it's most likely intellisense's fault and it'll build fine anyway diff --git a/src/nbl/asset/interchange/CHLSLLoader.cpp b/src/nbl/asset/interchange/CHLSLLoader.cpp index e049f3bdab..fece93d7b6 100644 --- a/src/nbl/asset/interchange/CHLSLLoader.cpp +++ b/src/nbl/asset/interchange/CHLSLLoader.cpp @@ -3,7 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/asset.h" -#include "CHLSLLoader.h" +#include "nbl/asset/interchange/CHLSLLoader.h" +#include "nbl/asset/metadata/CHLSLMetadata.h" using namespace nbl; using namespace nbl::asset; @@ -24,8 +25,7 @@ SAssetBundle CHLSLLoader::loadAsset(system::IFile* _file, const IAssetLoader::SA // make sure put string end terminator reinterpret_cast(source->getPointer())[len] = 0; - - const auto filename = _file->getFileName(); + const auto filename = _file->getFileName(); auto filenameEnding = filename.filename().string(); core::unordered_map typeFromExt = @@ -58,5 +58,8 @@ SAssetBundle CHLSLLoader::loadAsset(system::IFile* _file, const IAssetLoader::SA } source->setContentHash(source->computeContentHash()); - return SAssetBundle(nullptr,{core::make_smart_refctd_ptr(std::move(source), shaderStage, IShader::E_CONTENT_TYPE::ECT_HLSL, filename.string())}); + + auto shaderStages = core::make_refctd_dynamic_array>(1u); + shaderStages->front() = shaderStage; + return SAssetBundle(core::make_smart_refctd_ptr(std::move(shaderStages)), {core::make_smart_refctd_ptr(std::move(source), IShader::E_CONTENT_TYPE::ECT_HLSL, filename.string())}); } diff --git a/src/nbl/asset/interchange/CSPVLoader.cpp b/src/nbl/asset/interchange/CSPVLoader.cpp index 6787912cde..7d44085742 100644 --- a/src/nbl/asset/interchange/CSPVLoader.cpp +++ b/src/nbl/asset/interchange/CSPVLoader.cpp @@ -1,64 +1,14 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/core/declarations.h" -#include "nbl/asset/ICPUShader.h" -#include "nbl_spirv_cross/spirv.hpp" -#include "nbl_spirv_cross/spirv_parser.hpp" - -#include "CSPVLoader.h" -#include "nbl_spirv_cross/spirv.hpp" -#include "nbl_spirv_cross/spirv_cfg.hpp" -#include "nbl_spirv_cross/spirv_parser.hpp" +#include "nbl/asset/IShader.h" +#include "nbl/asset/interchange/CSPVLoader.h" using namespace nbl; using namespace nbl::asset; -inline IShader::E_SHADER_STAGE getShaderStageFromSPIRVCrossExecutionModel(spv::ExecutionModel model) -{ - IShader::E_SHADER_STAGE shaderStage; - switch (model) - { - case spv::ExecutionModelVertex: - shaderStage = IShader::E_SHADER_STAGE::ESS_VERTEX; break; - case spv::ExecutionModelTessellationControl: - shaderStage = IShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL; break; - case spv::ExecutionModelTessellationEvaluation: - shaderStage = IShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION; break; - case spv::ExecutionModelGeometry: - shaderStage = IShader::E_SHADER_STAGE::ESS_GEOMETRY; break; - case spv::ExecutionModelFragment: - shaderStage = IShader::E_SHADER_STAGE::ESS_FRAGMENT; break; - case spv::ExecutionModelGLCompute: - shaderStage = IShader::E_SHADER_STAGE::ESS_COMPUTE; break; - case spv::ExecutionModelTaskNV: - shaderStage = IShader::E_SHADER_STAGE::ESS_TASK; break; - case spv::ExecutionModelMeshNV: - shaderStage = IShader::E_SHADER_STAGE::ESS_MESH; break; - case spv::ExecutionModelRayGenerationKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_RAYGEN; break; - case spv::ExecutionModelIntersectionKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_INTERSECTION; break; - case spv::ExecutionModelAnyHitKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_ANY_HIT; break; - case spv::ExecutionModelClosestHitKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_MISS; break; - case spv::ExecutionModelMissKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_MISS; break; - case spv::ExecutionModelCallableKHR: - shaderStage = IShader::E_SHADER_STAGE::ESS_CALLABLE; break; - case spv::ExecutionModelKernel: - case spv::ExecutionModelMax: - default: - assert(!"Shader stage not supported!"); - shaderStage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; - break; - } - return shaderStage; -} - // load in the image data SAssetBundle CSPVLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { @@ -75,11 +25,6 @@ SAssetBundle CSPVLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAs if (reinterpret_cast(buffer->getPointer())[0]!=SPV_MAGIC_NUMBER) return {}; - SPIRV_CROSS_NAMESPACE::Parser parser(reinterpret_cast(buffer->getPointer()), buffer->getSize() / 4ull); - parser.parse(); - const SPIRV_CROSS_NAMESPACE::ParsedIR& parsedIR = parser.get_parsed_ir(); - SPIRV_CROSS_NAMESPACE::SPIREntryPoint defaultEntryPoint = parsedIR.entry_points.at(parsedIR.default_entry_point); - buffer->setContentHash(buffer->computeContentHash()); - return SAssetBundle(nullptr,{core::make_smart_refctd_ptr(std::move(buffer), getShaderStageFromSPIRVCrossExecutionModel(defaultEntryPoint.model), asset::IShader::E_CONTENT_TYPE::ECT_SPIRV, _file->getFileName().string())}); + return SAssetBundle(nullptr,{core::make_smart_refctd_ptr(std::move(buffer),asset::IShader::E_CONTENT_TYPE::ECT_SPIRV,_file->getFileName().string())}); } diff --git a/src/nbl/asset/interchange/CSPVLoader.h b/src/nbl/asset/interchange/CSPVLoader.h index fb586a1e6c..cd643478e7 100644 --- a/src/nbl/asset/interchange/CSPVLoader.h +++ b/src/nbl/asset/interchange/CSPVLoader.h @@ -1,7 +1,6 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #ifndef _NBL_ASSET_C_SPIR_V_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_SPIR_V_LOADER_H_INCLUDED_ @@ -12,7 +11,8 @@ namespace nbl::asset class CSPVLoader final : public asset::IAssetLoader { - _NBL_STATIC_INLINE_CONSTEXPR uint32_t SPV_MAGIC_NUMBER = 0x07230203u; + constexpr static inline uint32_t SPV_MAGIC_NUMBER = 0x07230203u; + public: CSPVLoader() = default; inline bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const override diff --git a/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp b/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp index ed8db65a26..7c09ef88cc 100644 --- a/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp +++ b/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp @@ -52,7 +52,7 @@ void IRenderpassIndependentPipelineLoader::initialize() semantic.descriptorSection.uniformBufferObject.set = 1u; semantic.descriptorSection.uniformBufferObject.relByteoffset = relOffsets[i]; semantic.descriptorSection.uniformBufferObject.bytesize = sizes[i]; - semantic.descriptorSection.shaderAccessFlags = ICPUShader::E_SHADER_STAGE::ESS_VERTEX; + semantic.descriptorSection.shaderAccessFlags = hlsl::ShaderStage::ESS_VERTEX; } } } diff --git a/src/nbl/asset/utils/CCompilerSet.cpp b/src/nbl/asset/utils/CCompilerSet.cpp index eb293baca5..e1b161f4e3 100644 --- a/src/nbl/asset/utils/CCompilerSet.cpp +++ b/src/nbl/asset/utils/CCompilerSet.cpp @@ -6,65 +6,63 @@ using namespace nbl; using namespace nbl::asset; -core::smart_refctd_ptr CCompilerSet::compileToSPIRV(const ICPUShader* shader, const IShaderCompiler::SCompilerOptions& options) const +core::smart_refctd_ptr CCompilerSet::compileToSPIRV(const IShader* shader, const IShaderCompiler::SCompilerOptions& options) const { - core::smart_refctd_ptr outSpirvShader = nullptr; + core::smart_refctd_ptr outSpirvShader = nullptr; if (shader) { switch (shader->getContentType()) { - case IShader::E_CONTENT_TYPE::ECT_HLSL: - { + case IShader::E_CONTENT_TYPE::ECT_HLSL: + { #ifdef _NBL_PLATFORM_WINDOWS_ - const char* code = reinterpret_cast(shader->getContent()->getPointer()); - outSpirvShader = m_HLSLCompiler->compileToSPIRV(code, options); + const char* code = reinterpret_cast(shader->getContent()->getPointer()); + outSpirvShader = m_HLSLCompiler->compileToSPIRV(code, options); #endif - } - break; - case IShader::E_CONTENT_TYPE::ECT_GLSL: - { - const char* code = reinterpret_cast(shader->getContent()->getPointer()); - outSpirvShader = m_GLSLCompiler->compileToSPIRV(code, options); - } - break; - case IShader::E_CONTENT_TYPE::ECT_SPIRV: - { - outSpirvShader = core::smart_refctd_ptr(const_cast(shader)); - } - break; + } + break; + case IShader::E_CONTENT_TYPE::ECT_GLSL: + { + const char* code = reinterpret_cast(shader->getContent()->getPointer()); + outSpirvShader = m_GLSLCompiler->compileToSPIRV(code, options); + } + break; + case IShader::E_CONTENT_TYPE::ECT_SPIRV: + { + outSpirvShader = core::smart_refctd_ptr(const_cast(shader)); + } + break; } } return outSpirvShader; } -core::smart_refctd_ptr CCompilerSet::preprocessShader(const ICPUShader* shader, const IShaderCompiler::SPreprocessorOptions& preprocessOptions) const +core::smart_refctd_ptr CCompilerSet::preprocessShader(const IShader* shader, hlsl::ShaderStage& stage, const IShaderCompiler::SPreprocessorOptions& preprocessOptions) const { if (shader) { switch (shader->getContentType()) { - case IShader::E_CONTENT_TYPE::ECT_HLSL: - { + case IShader::E_CONTENT_TYPE::ECT_HLSL: + { #ifdef _NBL_PLATFORM_WINDOWS_ - const char* code = reinterpret_cast(shader->getContent()->getPointer()); - auto stage = shader->getStage(); - auto resolvedCode = m_HLSLCompiler->preprocessShader(code, stage, preprocessOptions); - return core::make_smart_refctd_ptr(resolvedCode.c_str(), stage, IShader::E_CONTENT_TYPE::ECT_HLSL, std::string(shader->getFilepathHint())); + const char* code = reinterpret_cast(shader->getContent()->getPointer()); + auto resolvedCode = m_HLSLCompiler->preprocessShader(code, stage, preprocessOptions); + return core::make_smart_refctd_ptr(resolvedCode.c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, std::string(shader->getFilepathHint())); #endif - } - break; - case IShader::E_CONTENT_TYPE::ECT_GLSL: - { - const char* code = reinterpret_cast(shader->getContent()->getPointer()); - auto stage = shader->getStage(); - auto resolvedCode = m_GLSLCompiler->preprocessShader(code, stage, preprocessOptions); - return core::make_smart_refctd_ptr(resolvedCode.c_str(), stage, IShader::E_CONTENT_TYPE::ECT_GLSL, std::string(shader->getFilepathHint())); - } - break; - case IShader::E_CONTENT_TYPE::ECT_SPIRV: - return core::smart_refctd_ptr(const_cast(shader)); - default: - break; + } + break; + case IShader::E_CONTENT_TYPE::ECT_GLSL: + { + const char* code = reinterpret_cast(shader->getContent()->getPointer()); + auto resolvedCode = m_GLSLCompiler->preprocessShader(code, stage, preprocessOptions); + return core::make_smart_refctd_ptr(resolvedCode.c_str(), IShader::E_CONTENT_TYPE::ECT_GLSL, std::string(shader->getFilepathHint())); + } + break; + case IShader::E_CONTENT_TYPE::ECT_SPIRV: + return core::smart_refctd_ptr(const_cast(shader)); + default: + break; } } return nullptr; diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index c2783fafa1..ca6ee329a8 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -252,7 +252,7 @@ uint32_t CGLSLCompiler::encloseWithinExtraInclGuardsLeadingLines(uint32_t _maxIn return std::count(substr.begin(), substr.end(), '\n'); } -core::smart_refctd_ptr CGLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const +core::smart_refctd_ptr CGLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const { // The dependencies are only sent if a Cache was requested. Since caching is not supported for GLSL, we crash the program assert(!dependencies); @@ -282,7 +282,7 @@ core::smart_refctd_ptr CGLSLCompiler::compileToSPIRV_impl(const std: if (glslOptions.spirvOptimizer) outSpirv = glslOptions.spirvOptimizer->optimize(outSpirv.get(), glslOptions.preprocessorOptions.logger); - return core::make_smart_refctd_ptr(std::move(outSpirv), glslOptions.stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, glslOptions.preprocessorOptions.sourceIdentifier.data()); + return core::make_smart_refctd_ptr(std::move(outSpirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, glslOptions.preprocessorOptions.sourceIdentifier.data()); } else { diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 4905e21a9e..f99d81d01f 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -426,7 +426,7 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags); } -core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const +core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const { auto hlslOptions = option_cast(options); auto logger = hlslOptions.preprocessorOptions.logger; @@ -472,9 +472,10 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std: return nullptr; break; } - // TODO: add entry point to `CHLSLCompiler::SOptions` and handle it properly in `dxc_compile_flags.empty()` - if (stage != asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY) { + if (stage != asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY) + { arguments.push_back(L"-E"); + // TODO: add entry point to `CHLSLCompiler::SOptions` and handle it properly in `dxc_compile_flags.empty()` arguments.push_back(L"main"); } // If a custom SPIR-V optimizer is specified, use that instead of DXC's spirv-opt. @@ -544,7 +545,7 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std: if (hlslOptions.spirvOptimizer) outSpirv = hlslOptions.spirvOptimizer->optimize(outSpirv.get(), logger); - return core::make_smart_refctd_ptr(std::move(outSpirv), stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, hlslOptions.preprocessorOptions.sourceIdentifier.data()); + return core::make_smart_refctd_ptr(std::move(outSpirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, hlslOptions.preprocessorOptions.sourceIdentifier.data()); } diff --git a/src/nbl/asset/utils/CSPIRVIntrospector.cpp b/src/nbl/asset/utils/CSPIRVIntrospector.cpp index 8d0f05bf8e..8b43c676b7 100644 --- a/src/nbl/asset/utils/CSPIRVIntrospector.cpp +++ b/src/nbl/asset/utils/CSPIRVIntrospector.cpp @@ -106,14 +106,15 @@ static CSPIRVIntrospector::CStageIntrospectionData::VAR_TYPE spvcrossType2E_TYPE } } -core::smart_refctd_ptr CSPIRVIntrospector::createApproximateComputePipelineFromIntrospection(const ICPUShader::SSpecInfo& info, core::smart_refctd_ptr&& layout/* = nullptr*/) +core::smart_refctd_ptr CSPIRVIntrospector::createApproximateComputePipelineFromIntrospection(const IPipelineBase::SShaderSpecInfo& info, core::smart_refctd_ptr&& layout/* = nullptr*/) { - if (info.shader->getStage() != IShader::E_SHADER_STAGE::ESS_COMPUTE || info.valid() == ICPUShader::SSpecInfo::INVALID_SPEC_INFO) + if (info.stage!=IShader::E_SHADER_STAGE::ESS_COMPUTE || info.valid()==IPipelineBase::SShaderSpecInfo::INVALID_SPEC_INFO) return nullptr; CStageIntrospectionData::SParams params; params.entryPoint = info.entryPoint; - params.shader = core::smart_refctd_ptr(info.shader); + params.shader = core::smart_refctd_ptr(info.shader); + params.stage = info.stage; auto introspection = introspect(params); @@ -173,14 +174,15 @@ core::smart_refctd_ptr CSPIRVIntrospector::createApproximat layout = pplnIntrospectData->createApproximatePipelineLayoutFromIntrospection(introspection); } - ICPUComputePipeline::SCreationParams pplnCreationParams = { {.layout = layout.get()} }; + ICPUComputePipeline::SCreationParams pplnCreationParams; + pplnCreationParams.layout = layout.get(); pplnCreationParams.shader = info; pplnCreationParams.layout = layout.get(); return ICPUComputePipeline::create(pplnCreationParams); } // returns true if successfully added all the info to self, false if incompatible with what's already in our pipeline or incomplete (e.g. missing spec constants) -NBL_API2 bool CSPIRVIntrospector::CPipelineIntrospectionData::merge(const CSPIRVIntrospector::CStageIntrospectionData* stageData, const ICPUShader::SSpecInfoBase::spec_constant_map_t* specConstants) +NBL_API2 bool CSPIRVIntrospector::CPipelineIntrospectionData::merge(const CSPIRVIntrospector::CStageIntrospectionData* stageData, const IPipelineBase::SShaderSpecInfo::spec_constant_map_t* specConstants) { if (!stageData) return false; @@ -278,13 +280,13 @@ NBL_API2 bool CSPIRVIntrospector::CPipelineIntrospectionData::merge(const CSPIRV auto a = pc.size; if (pc.present()) { - std::span> pcRangesSpan = { + std::span> pcRangesSpan = { m_pushConstantBytes.data() + pc.offset, pc.size }; // iterate over all bytes used - const IShader::E_SHADER_STAGE shaderStage = stageData->getParams().shader->getStage(); + const IShader::E_SHADER_STAGE shaderStage = stageData->getParams().stage; for (auto it = pcRangesSpan.begin(); it != pcRangesSpan.end(); ++it) *it |= shaderStage; } @@ -616,7 +618,7 @@ NBL_API2 core::smart_refctd_ptrgetContent(); spirv_cross::Compiler comp(reinterpret_cast(spv->getPointer()), spv->getSize() / 4u); - const IShader::E_SHADER_STAGE shaderStage = params.shader->getStage(); + const IShader::E_SHADER_STAGE shaderStage = params.stage; spv::ExecutionModel stage = ESS2spvExecModel(shaderStage); if (stage == spv::ExecutionModelMax) diff --git a/src/nbl/asset/utils/ISPIRVDebloater.cpp b/src/nbl/asset/utils/ISPIRVDebloater.cpp new file mode 100644 index 0000000000..f05e9d70f5 --- /dev/null +++ b/src/nbl/asset/utils/ISPIRVDebloater.cpp @@ -0,0 +1,259 @@ +#include "nbl/asset/utils/ISPIRVDebloater.h" +#include "nbl/asset/utils/ISPIRVOptimizer.h" +#include "nbl_spirv_cross/spirv.hpp" + +#include "nbl/core/declarations.h" +#include "nbl/system/ILogger.h" +#include "spirv-tools/libspirv.hpp" + +using namespace nbl::asset; + +static constexpr spv_target_env SPIRV_VERSION = spv_target_env::SPV_ENV_UNIVERSAL_1_6; + +ISPIRVDebloater::ISPIRVDebloater() +{ + constexpr auto optimizationPasses = std::array{ + ISPIRVOptimizer::EOP_DEAD_BRANCH_ELIM, + ISPIRVOptimizer::EOP_ELIM_DEAD_FUNCTIONS, + ISPIRVOptimizer::EOP_DEAD_BRANCH_ELIM, + ISPIRVOptimizer::EOP_ELIM_DEAD_FUNCTIONS, + ISPIRVOptimizer::EOP_ELIM_DEAD_VARIABLES, + ISPIRVOptimizer::EOP_ELIM_DEAD_CONSTANTS, + ISPIRVOptimizer::EOP_ELIM_DEAD_MEMBERS, + ISPIRVOptimizer::EOP_TRIM_CAPABILITIES, + }; + m_optimizer = core::make_smart_refctd_ptr(std::span(optimizationPasses)); +} + +// This is for debugging temporarily. will be reworked after finish testing +static void printCapabilities(const uint32_t* spirv, uint32_t spirvDwordCount,nbl::system::logger_opt_ptr logger) +{ + spvtools::SpirvTools core(SPIRV_VERSION); + std::string disassembly; + core.Disassemble(spirv, spirvDwordCount, &disassembly, SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + std::stringstream ss(disassembly); + std::string to; + const auto stringsToFind = std::array{ "OpCapability", "= OpFunction","OpFunctionEnd", "OpSpecConstant", "=OpType"}; + while(std::getline(ss, to, '\n')){ + if (to.size() > 1 && to.back() == ',') continue; + for (const auto& stringToFind: stringsToFind) + { + if (to.find(stringToFind) != std::string::npos) + { + logger.log("%s", nbl::system::ILogger::ELL_DEBUG, to.c_str()); + } + } + } +} + +static bool validate(const uint32_t* binary, uint32_t binarySize, nbl::system::logger_opt_ptr logger) +{ + auto msgConsumer = [&logger](spv_message_level_t level, const char* src, const spv_position_t& pos, const char* msg) + { + using namespace std::string_literals; + + + constexpr static nbl::system::ILogger::E_LOG_LEVEL lvl2lvl[6]{ + nbl::system::ILogger::ELL_ERROR, + nbl::system::ILogger::ELL_ERROR, + nbl::system::ILogger::ELL_ERROR, + nbl::system::ILogger::ELL_WARNING, + nbl::system::ILogger::ELL_INFO, + nbl::system::ILogger::ELL_DEBUG + }; + const auto lvl = lvl2lvl[level]; + std::string location; + if (src) + location = src + ":"s + std::to_string(pos.line) + ":" + std::to_string(pos.column); + else + location = ""; + + logger.log(location, lvl, msg); + }; + spvtools::SpirvTools core(SPIRV_VERSION); + core.SetMessageConsumer(msgConsumer); + spvtools::ValidatorOptions validatorOptions; + // Nabla use Scalar block layout, we skip this validation to work around this and to save time + validatorOptions.SetSkipBlockLayout(true); + return core.Validate(binary, binarySize, validatorOptions); +} + +ISPIRVDebloater::Result ISPIRVDebloater::debloat(const ICPUBuffer* spirvBuffer, const core::set& entryPoints, system::logger_opt_ptr logger) const +{ + const auto* spirv = static_cast(spirvBuffer->getPointer()); + const auto spirvDwordCount = spirvBuffer->getSize() / 4; + + if (entryPoints.empty()) + { + logger.log("Cannot retain zero multiple entry points!", system::ILogger::ELL_ERROR); + return Result{ + nullptr, + false + }; + } + + auto foundEntryPoint = 0; + + const bool isInputSpirvValid = validate(spirv, spirvDwordCount, logger); + if (!isInputSpirvValid) + { + logger.log("SPIR-V is not valid", system::ILogger::ELL_ERROR); + return Result{ + nullptr, + false + }; + } + + auto getHlslShaderStage = [](spv::ExecutionModel executionModel) -> hlsl::ShaderStage + { + switch (executionModel) + { + case spv::ExecutionModelVertex : return hlsl::ESS_VERTEX; + case spv::ExecutionModelTessellationControl : return hlsl::ESS_TESSELLATION_CONTROL; + case spv::ExecutionModelTessellationEvaluation : return hlsl::ESS_TESSELLATION_EVALUATION; + case spv::ExecutionModelGeometry : return hlsl::ESS_GEOMETRY; + case spv::ExecutionModelFragment : return hlsl::ESS_FRAGMENT; + case spv::ExecutionModelGLCompute : return hlsl::ESS_COMPUTE; + case spv::ExecutionModelTaskEXT : return hlsl::ESS_TASK; + case spv::ExecutionModelMeshEXT: return hlsl::ESS_MESH; + case spv::ExecutionModelRayGenerationKHR : return hlsl::ESS_RAYGEN; + case spv::ExecutionModelAnyHitKHR : return hlsl::ESS_ANY_HIT; + case spv::ExecutionModelClosestHitKHR : return hlsl::ESS_CLOSEST_HIT; + case spv::ExecutionModelMissKHR : return hlsl::ESS_MISS; + case spv::ExecutionModelIntersectionKHR : return hlsl::ESS_INTERSECTION; + case spv::ExecutionModelCallableKHR : return hlsl::ESS_CALLABLE; + default: + { + return hlsl::ESS_UNKNOWN; + } + } + }; + + static constexpr auto HEADER_SIZE = 5; + + std::vector minimizedSpirv; + core::unordered_set removedEntryPointIds; + + bool needDebloat = false; + auto offset = HEADER_SIZE; + auto parse_instruction = [](uint32_t instruction) -> std::tuple + { + const auto length = instruction >> 16; + const auto opcode = instruction & 0x0ffffu; + return { length, opcode }; + }; + + // Keep in mind about this layout while reading all the code below: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#LogicalLayout + + // skip until entry point + while (offset < spirvDwordCount) { + const auto instruction = spirv[offset]; + const auto [length, opcode] = parse_instruction(instruction); + if (opcode == spv::OpEntryPoint) break; + offset += length; + } + + // handle entry points removal + while (offset < spirvDwordCount) { + const auto curOffset = offset; + const auto instruction = spirv[curOffset]; + const auto [length, opcode] = parse_instruction(instruction); + if (opcode != spv::OpEntryPoint) break; + offset += length; + + const auto curExecutionModel = static_cast(spirv[curOffset + 1]); + const auto curEntryPointId = spirv[curOffset + 2]; + const auto curEntryPointName = std::string_view(reinterpret_cast(spirv + curOffset + 3)); + + const auto entryPoint = EntryPoint{ + .name = curEntryPointName, + .stage = getHlslShaderStage(curExecutionModel), + }; + + if (entryPoint.stage == hlsl::ESS_UNKNOWN) + { + logger.log("Found entry point with unsupported execution model in SPIR-V", system::ILogger::ELL_ERROR); + return Result{ + .spirv = nullptr, + .isSuccess = false + }; + } + + auto findEntryPointIt = entryPoints.find(entryPoint); + if (findEntryPointIt != entryPoints.end()) + { + foundEntryPoint += 1; // a valid spirv will have unique entry points, so this should works + } else + { + if (needDebloat == false) + { + minimizedSpirv.reserve(spirvDwordCount); + minimizedSpirv.insert(minimizedSpirv.end(), spirv, spirv + curOffset); + needDebloat = true; + } + removedEntryPointIds.insert(curEntryPointId); + continue; + } + if (!needDebloat) continue; + minimizedSpirv.insert(minimizedSpirv.end(), spirv + curOffset, spirv + offset); + } + + const auto wereAllEntryPointsFound = foundEntryPoint == entryPoints.size(); + if (!wereAllEntryPointsFound) + { + logger.log("Some entry point that is requested to be retained is not found in SPIR-V", system::ILogger::ELL_ERROR); + return { + .spirv = nullptr, + .isSuccess = false, + }; + } + + if (!needDebloat) + { + return { + .spirv = nullptr, + .isSuccess = true, + }; + } + + // handle execution model removal + while (offset < spirvDwordCount) + { + const auto curOffset = offset; + const auto instruction = spirv[curOffset]; + const auto [length, opcode] = parse_instruction(instruction); + if (opcode != spv::OpExecutionMode && opcode != spv::OpExecutionModeId) break; + offset += length; + const auto entryPointId = static_cast(spirv[curOffset + 1]); + if (removedEntryPointIds.contains(entryPointId)) + { + continue; + } + minimizedSpirv.insert(minimizedSpirv.end(), spirv + curOffset, spirv + offset); + } + + minimizedSpirv.insert(minimizedSpirv.end(), spirv + offset, spirv + spirvDwordCount); + + assert(validate(minimizedSpirv.data(), minimizedSpirv.size(), logger)); + + auto debloatedSpirv = m_optimizer->optimize(minimizedSpirv.data(), minimizedSpirv.size(), logger); + +#ifdef _NBL_DEBUG + logger.log("Before stripping capabilities:", nbl::system::ILogger::ELL_DEBUG); + printCapabilities(spirv, spirvDwordCount, logger); + logger.log("\n", nbl::system::ILogger::ELL_DEBUG); + + const auto* debloatedSpirvBuffer = static_cast(debloatedSpirv->getPointer()); + const auto debloatedSpirvDwordCount = debloatedSpirv->getSize() / 4; + logger.log("After stripping capabilities:", nbl::system::ILogger::ELL_DEBUG); + printCapabilities(debloatedSpirvBuffer, debloatedSpirvDwordCount, logger); + logger.log("\n", nbl::system::ILogger::ELL_DEBUG); +#endif + + return { + .spirv = std::move(debloatedSpirv), + .isSuccess = true, + }; + +} + diff --git a/src/nbl/asset/utils/ISPIRVOptimizer.cpp b/src/nbl/asset/utils/ISPIRVOptimizer.cpp index 106cdbdfe6..41dd17dae9 100644 --- a/src/nbl/asset/utils/ISPIRVOptimizer.cpp +++ b/src/nbl/asset/utils/ISPIRVOptimizer.cpp @@ -22,27 +22,39 @@ nbl::core::smart_refctd_ptr ISPIRVOptimizer::optimize(const uint32_t }; using create_pass_f_t = spvtools::Optimizer::PassToken(*)(); - create_pass_f_t create_pass_f[EOP_COUNT]{ - &spvtools::CreateMergeReturnPass, - &spvtools::CreateInlineExhaustivePass, - &spvtools::CreateEliminateDeadFunctionsPass, - CreateScalarReplacementPass, - &spvtools::CreateLocalSingleBlockLoadStoreElimPass, - &spvtools::CreateLocalSingleStoreElimPass, - &spvtools::CreateSimplificationPass, - &spvtools::CreateVectorDCEPass, - &spvtools::CreateDeadInsertElimPass, - &spvtools::CreateDeadBranchElimPass, - &spvtools::CreateBlockMergePass, - &spvtools::CreateLocalMultiStoreElimPass, - &spvtools::CreateRedundancyEliminationPass, - &spvtools::CreateLoopInvariantCodeMotionPass, - &spvtools::CreateCCPPass, - CreateReduceLoadSizePass, - &spvtools::CreateStrengthReductionPass, - &spvtools::CreateIfConversionPass, - &spvtools::CreateStripDebugInfoPass, - //&spvtools::CreateAggressiveDCEPass + auto getSpirvOptimizerPass = [&](E_OPTIMIZER_PASS pass) -> create_pass_f_t + { + switch (pass) + { + case EOP_MERGE_RETURN: return &spvtools::CreateMergeReturnPass; + case EOP_INLINE: return &spvtools::CreateInlineExhaustivePass; + case EOP_ELIM_DEAD_FUNCTIONS: return &spvtools::CreateEliminateDeadFunctionsPass; + case EOP_ELIM_DEAD_VARIABLES: return &spvtools::CreateDeadVariableEliminationPass; + case EOP_ELIM_DEAD_CONSTANTS: return &spvtools::CreateEliminateDeadConstantPass; + case EOP_ELIM_DEAD_MEMBERS: return &spvtools::CreateEliminateDeadMembersPass; + case EOP_SCALAR_REPLACEMENT: return CreateScalarReplacementPass; + case EOP_LOCAL_SINGLE_BLOCK_LOAD_STORE_ELIM: return &spvtools::CreateLocalSingleBlockLoadStoreElimPass; + case EOP_LOCAL_SINGLE_STORE_ELIM: return &spvtools::CreateLocalSingleStoreElimPass; + case EOP_SIMPLIFICATION: return &spvtools::CreateSimplificationPass; + case EOP_VECTOR_DCE: return &spvtools::CreateVectorDCEPass; + case EOP_DEAD_INSERT_ELIM: return &spvtools::CreateDeadInsertElimPass; + case EOP_DEAD_BRANCH_ELIM: return &spvtools::CreateDeadBranchElimPass; + case EOP_BLOCK_MERGE: return &spvtools::CreateBlockMergePass; + case EOP_LOCAL_MULTI_STORE_ELIM: return &spvtools::CreateLocalMultiStoreElimPass; + case EOP_REDUNDANCY_ELIM: return &spvtools::CreateRedundancyEliminationPass; + case EOP_LOOP_INVARIANT_CODE_MOTION: return &spvtools::CreateLoopInvariantCodeMotionPass; + case EOP_CCP: return &spvtools::CreateCCPPass; + case EOP_REDUCE_LOAD_SIZE: return CreateReduceLoadSizePass; + case EOP_STRENGTH_REDUCTION: return &spvtools::CreateStrengthReductionPass; + case EOP_IF_CONVERSION: return &spvtools::CreateIfConversionPass; + case EOP_STRIP_DEBUG_INFO: return &spvtools::CreateStripDebugInfoPass; + case EOP_TRIM_CAPABILITIES: return &spvtools::CreateTrimCapabilitiesPass; + case EOP_AGGRESSIVE_DCE: return &spvtools::CreateAggressiveDCEPass; + case EOP_REMOVE_UNUSED_INTERFACE_VARIABLES: return &spvtools::CreateRemoveUnusedInterfaceVariablesPass; + case EOP_ELIMINATE_DEAD_INPUT_COMPONENTS_SAFE: return &spvtools::CreateEliminateDeadInputComponentsSafePass; + default: + return nullptr; + } }; auto msgConsumer = [&logger](spv_message_level_t level, const char* src, const spv_position_t& pos, const char* msg) @@ -72,8 +84,15 @@ nbl::core::smart_refctd_ptr ISPIRVOptimizer::optimize(const uint32_t spvtools::Optimizer opt(SPIRV_VERSION); - for (E_OPTIMIZER_PASS pass : m_passes) - opt.RegisterPass(create_pass_f[pass]()); + for (E_OPTIMIZER_PASS pass : m_passes) { + if (const auto& spirvPass = getSpirvOptimizerPass(pass); spirvPass != nullptr) + { + opt.RegisterPass(spirvPass()); + } else + { + logger.log("Optimizer pass is unknown or not supported!", system::ILogger::ELL_WARNING); + } + } opt.SetMessageConsumer(msgConsumer); diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index d1515ac214..3164fb4f74 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -23,7 +23,7 @@ IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& syste m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); } -core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const +core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const { CCache::SEntry entry; if (options.readCache || options.writeCache) @@ -43,7 +43,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(c } } - auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &entry.dependencies : nullptr); + auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &entry.dependencies:nullptr); // compute the SPIR-V shader content hash if (retVal) { @@ -259,7 +259,7 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in return {}; } -core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { const auto found = find_impl(mainFile, finder); if (found==m_container.end()) @@ -423,7 +423,7 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu return true; } -core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const +core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const { auto uncompressedBuf = ICPUBuffer::create({ uncompressedSize }); uncompressedBuf->setContentHash(uncompressedContentHash); @@ -438,5 +438,5 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry:: reinterpret_cast(spirv->getPointer()), LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &alloc); assert(res == SZ_OK); - return core::make_smart_refctd_ptr(std::move(uncompressedBuf), compilerArgs.stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data()); + return core::make_smart_refctd_ptr(std::move(uncompressedBuf), IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data()); } diff --git a/src/nbl/ext/FFT/FFT.cpp b/src/nbl/ext/FFT/FFT.cpp deleted file mode 100644 index 746fc1801f..0000000000 --- a/src/nbl/ext/FFT/FFT.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/FFT/FFT.h" -#include "../../../../source/Nabla/COpenGLExtensionHandler.h" - -#include - -using namespace nbl; -using namespace nbl::asset; -using namespace nbl::video; -using namespace ext::FFT; - -FFT::FFT(IDriver* driver, uint32_t maxDimensionSize, bool useHalfStorage) : m_maxFFTLen(core::roundUpToPoT(maxDimensionSize)), m_halfFloatStorage(useHalfStorage) -{ - // TODO: cache layouts using asset mgr or something - static IGPUDescriptorSetLayout::SBinding bnd[] = - { - { - 0u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 1u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - }; - m_dsLayout = driver->createDescriptorSetLayout(bnd,bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)); - - auto pcRange = getDefaultPushConstantRanges(); - m_pplnLayout = driver->createPipelineLayout(pcRange.begin(),pcRange.end(),core::smart_refctd_ptr(m_dsLayout)); - - if (m_maxFFTLen < MINIMUM_FFT_SIZE) - m_maxFFTLen = MINIMUM_FFT_SIZE; - - const char* sourceFmt = -R"===(#version 430 core - -#define _NBL_GLSL_WORKGROUP_SIZE_ %u -#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ %u -#define _NBL_GLSL_EXT_FFT_HALF_STORAGE_ %u - -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; -#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" - -)==="; - - constexpr size_t extraSize = 8u*2u+1u; - - auto source = ICPUBuffer::create({ strlen(sourceFmt)+extraSize+1u }); - snprintf( - reinterpret_cast(source->getPointer()),source->getSize(), sourceFmt, - DEFAULT_WORK_GROUP_SIZE, - m_maxFFTLen, - useHalfStorage ? 1u:0u - ); - - auto shader = driver->createShader(core::make_smart_refctd_ptr(std::move(source),asset::ICPUShader::buffer_contains_glsl)); - - auto specializedShader = driver->createSpecializedShader( - shader.get(), - ISpecializedShader::SInfo{nullptr, nullptr, "main", ISpecializedShader::ESS_COMPUTE} - ); - - m_ppln = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(m_pplnLayout),std::move(specializedShader)); -} - -core::SRange FFT::getDefaultPushConstantRanges() -{ - static const SPushConstantRange ranges[1] = - { - { - ISpecializedShader::ESS_COMPUTE, - 0u, - sizeof(Parameters_t) - }, - }; - return {ranges,ranges+1}; -} - -void FFT::updateDescriptorSet( - video::IVideoDriver * driver, - video::IGPUDescriptorSet * set, - core::smart_refctd_ptr inputBufferDescriptor, - core::smart_refctd_ptr outputBufferDescriptor) -{ - constexpr uint32_t MAX_DESCRIPTOR_COUNT = 2u; - video::IGPUDescriptorSet::SDescriptorInfo pInfos[MAX_DESCRIPTOR_COUNT]; - video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[MAX_DESCRIPTOR_COUNT]; - - for (auto i=0; i< MAX_DESCRIPTOR_COUNT; i++) - { - pWrites[i].dstSet = set; - pWrites[i].arrayElement = 0u; - pWrites[i].count = 1u; - pWrites[i].info = pInfos+i; - } - - // Input Buffer - pWrites[0].binding = 0; - pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[0].count = 1; - pInfos[0].desc = inputBufferDescriptor; - pInfos[0].buffer.size = inputBufferDescriptor->getSize(); - pInfos[0].buffer.offset = 0u; - - // Output Buffer - pWrites[1].binding = 1; - pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[1].count = 1; - pInfos[1].desc = outputBufferDescriptor; - pInfos[1].buffer.size = outputBufferDescriptor->getSize(); - pInfos[1].buffer.offset = 0u; - - driver->updateDescriptorSets(2u, pWrites, 0u, nullptr); -} - -void FFT::defaultBarrier() -{ - COpenGLExtensionHandler::pGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); -} diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index 58e30285fb..b40c7155be 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -151,7 +151,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation struct { - smart_refctd_ptr vertex, fragment; + smart_refctd_ptr vertex, fragment; } shaders; { @@ -164,7 +164,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation auto includeLoader = includeFinder->getDefaultFileSystemLoader(); includeFinder->addSearchPath(NBL_ARCHIVE_ENTRY.data(), includeLoader); - auto createShader = [&]() -> smart_refctd_ptr + auto createShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams params = {}; params.logger = creationParams.utilities->getLogger(); @@ -179,7 +179,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation return nullptr; } - const auto shader = IAsset::castDown(assets[0]); + const auto shader = IAsset::castDown(assets[0]); CHLSLCompiler::SOptions options = {}; options.stage = stage; @@ -187,59 +187,59 @@ core::smart_refctd_ptr UI::createPipeline(SCreation options.preprocessorOptions.logger = creationParams.utilities->getLogger(); options.preprocessorOptions.includeFinder = includeFinder.get(); - auto compileToSPIRV = [&]() -> smart_refctd_ptr - { - auto toOptions = [](const std::array& in) // options must be alive till compileToSPIRV ends + auto compileToSPIRV = [&]() -> smart_refctd_ptr { - const auto required = CHLSLCompiler::getRequiredArguments(); - std::array options; + auto toOptions = [](const std::array&in) // options must be alive till compileToSPIRV ends + { + const auto required = CHLSLCompiler::getRequiredArguments(); + std::array options; - std::wstring_convert> converter; - for (uint32_t i = 0; i < required.size(); ++i) - options[i] = converter.to_bytes(required[i]); // meh + std::wstring_convert> converter; + for (uint32_t i = 0; i < required.size(); ++i) + options[i] = converter.to_bytes(required[i]); // meh - uint32_t offset = required.size(); - for (const auto& opt : in) - options[offset++] = std::string(opt); + uint32_t offset = required.size(); + for (const auto& opt : in) + options[offset++] = std::string(opt); - return options; - }; + return options; + }; - const std::string_view code (reinterpret_cast(shader->getContent()->getPointer()), shader->getContent()->getSize()); + const std::string_view code(reinterpret_cast(shader->getContent()->getPointer()), shader->getContent()->getSize()); - if constexpr (stage == IShader::E_SHADER_STAGE::ESS_VERTEX) - { - const auto VERTEX_COMPILE_OPTIONS = toOptions(std::to_array({ "-T", "vs_6_7", "-E", "VSMain", "-O3" })); - options.dxcOptions = VERTEX_COMPILE_OPTIONS; + if constexpr (stage == IShader::E_SHADER_STAGE::ESS_VERTEX) + { + const auto VERTEX_COMPILE_OPTIONS = toOptions(std::to_array({ "-T", "vs_6_7", "-E", "VSMain", "-O3" })); + options.dxcOptions = VERTEX_COMPILE_OPTIONS; - return compiler->compileToSPIRV(code.data(), options); // we good here - no code patching - } - else if (stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT) - { - const auto FRAGMENT_COMPILE_OPTIONS = toOptions(std::to_array({ "-T", "ps_6_7", "-E", "PSMain", "-O3" })); - options.dxcOptions = FRAGMENT_COMPILE_OPTIONS; - - std::stringstream stream; - - // TODO: Use the `ConstevalBindingInfo` - stream << "// -> this code has been autogenerated with Nabla ImGUI extension\n" - << "#define NBL_TEXTURES_BINDING_IX " << creationParams.resources.texturesInfo.bindingIx << "\n" - << "#define NBL_SAMPLER_STATES_BINDING_IX " << creationParams.resources.samplersInfo.bindingIx << "\n" - << "#define NBL_TEXTURES_SET_IX " << creationParams.resources.texturesInfo.setIx << "\n" - << "#define NBL_SAMPLER_STATES_SET_IX " << creationParams.resources.samplersInfo.setIx << "\n" - << "#define NBL_TEXTURES_COUNT " << creationParams.resources.texturesCount << "\n" - << "#define NBL_SAMPLERS_COUNT " << creationParams.resources.samplersCount << "\n" - << "// <-\n\n"; - - const auto newCode = stream.str() + std::string(code); - return compiler->compileToSPIRV(newCode.c_str(), options); // but here we do patch the code with additional define directives for which values are taken from the creation parameters - } - else - { - static_assert(stage != IShader::E_SHADER_STAGE::ESS_UNKNOWN, "Unknown shader stage!"); - return nullptr; - } - }; + return compiler->compileToSPIRV(code.data(), options); // we good here - no code patching + } + else if (stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT) + { + const auto FRAGMENT_COMPILE_OPTIONS = toOptions(std::to_array({ "-T", "ps_6_7", "-E", "PSMain", "-O3" })); + options.dxcOptions = FRAGMENT_COMPILE_OPTIONS; + + std::stringstream stream; + + // TODO: Use the `ConstevalBindingInfo` + stream << "// -> this code has been autogenerated with Nabla ImGUI extension\n" + << "#define NBL_TEXTURES_BINDING_IX " << creationParams.resources.texturesInfo.bindingIx << "\n" + << "#define NBL_SAMPLER_STATES_BINDING_IX " << creationParams.resources.samplersInfo.bindingIx << "\n" + << "#define NBL_TEXTURES_SET_IX " << creationParams.resources.texturesInfo.setIx << "\n" + << "#define NBL_SAMPLER_STATES_SET_IX " << creationParams.resources.samplersInfo.setIx << "\n" + << "#define NBL_TEXTURES_COUNT " << creationParams.resources.texturesCount << "\n" + << "#define NBL_SAMPLERS_COUNT " << creationParams.resources.samplersCount << "\n" + << "// <-\n\n"; + + const auto newCode = stream.str() + std::string(code); + return compiler->compileToSPIRV(newCode.c_str(), options); // but here we do patch the code with additional define directives for which values are taken from the creation parameters + } + else + { + static_assert(stage != IShader::E_SHADER_STAGE::ESS_UNKNOWN, "Unknown shader stage!"); + return nullptr; + } + }; auto spirv = compileToSPIRV(); @@ -249,7 +249,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation return nullptr; } - auto gpu = creationParams.utilities->getLogicalDevice()->createShader(spirv.get()); + auto gpu = creationParams.utilities->getLogicalDevice()->compileShader({.source = spirv.get(),}); if (!gpu) creationParams.utilities->getLogger()->log("Could not create GPU shader for \"%s\"!", ILogger::ELL_ERROR, key.value); @@ -342,10 +342,10 @@ core::smart_refctd_ptr UI::createPipeline(SCreation core::smart_refctd_ptr pipeline; { - const IGPUShader::SSpecInfo specs[] = + const IPipelineBase::SShaderSpecInfo specs[] = { - { .entryPoint = "VSMain", .shader = shaders.vertex.get() }, - { .entryPoint = "PSMain", .shader = shaders.fragment.get() } + {.shader = shaders.vertex.get(), .entryPoint = "VSMain", .stage = hlsl::ShaderStage::ESS_VERTEX}, + {.shader = shaders.fragment.get(), .entryPoint = "PSMain", .stage = hlsl::ShaderStage::ESS_FRAGMENT} }; IGPUGraphicsPipeline::SCreationParams params[1]; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index b569a5fde2..40b30287ed 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -470,7 +470,7 @@ bool CVulkanCommandBuffer::bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_ return true; } -bool CVulkanCommandBuffer::pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) +bool CVulkanCommandBuffer::pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) { getFunctionTable().vkCmdPushConstants(m_cmdbuf,static_cast(layout)->getInternalObject(),getVkShaderStageFlagsFromShaderStage(stageFlags),offset,size,pValues); return true; diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 634d8c4f2b..99b1c15644 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -185,7 +185,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; - bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; + bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; bool bindVertexBuffers_impl(const uint32_t firstBinding, const uint32_t bindingCount, const asset::SBufferBinding* const pBindings) override; bool bindIndexBuffer_impl(const asset::SBufferBinding& binding, const asset::E_INDEX_TYPE indexType) override; diff --git a/src/nbl/video/CVulkanComputePipeline.h b/src/nbl/video/CVulkanComputePipeline.h index 666e2869d9..76fb346e30 100644 --- a/src/nbl/video/CVulkanComputePipeline.h +++ b/src/nbl/video/CVulkanComputePipeline.h @@ -4,8 +4,6 @@ #include "nbl/video/IGPUComputePipeline.h" -#include "nbl/video/CVulkanShader.h" - #include @@ -18,11 +16,9 @@ class CVulkanComputePipeline final : public IGPUComputePipeline public: CVulkanComputePipeline( core::smart_refctd_ptr&& _layout, - core::smart_refctd_ptr&& _shader, const core::bitflag _flags, const VkPipeline pipeline - ) : IGPUComputePipeline(std::move(_layout),_flags), - m_pipeline(pipeline), m_shader(std::move(_shader)) {} + ) : IGPUComputePipeline(std::move(_layout),_flags), m_pipeline(pipeline) {} inline const void* getNativeHandle() const override { return &m_pipeline; } @@ -34,8 +30,6 @@ class CVulkanComputePipeline final : public IGPUComputePipeline ~CVulkanComputePipeline(); const VkPipeline m_pipeline; - // gotta keep that VkShaderModule alive (for now, until maintenance5) - const core::smart_refctd_ptr m_shader; }; } #endif \ No newline at end of file diff --git a/src/nbl/video/CVulkanGraphicsPipeline.h b/src/nbl/video/CVulkanGraphicsPipeline.h index c4c58c603e..1b99e58dd6 100644 --- a/src/nbl/video/CVulkanGraphicsPipeline.h +++ b/src/nbl/video/CVulkanGraphicsPipeline.h @@ -4,8 +4,6 @@ #include "nbl/video/IGPUGraphicsPipeline.h" -#include "nbl/video/CVulkanShader.h" - namespace nbl::video { @@ -14,15 +12,7 @@ class CVulkanGraphicsPipeline final : public IGPUGraphicsPipeline { public: CVulkanGraphicsPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : - IGPUGraphicsPipeline(params), m_vkPipeline(vk_pipeline) - { - for (const auto& info : params.shaders) - if (info.shader) - { - const auto stageIx = hlsl::findLSB(info.shader->getStage()); - m_shaders[stageIx] = core::smart_refctd_ptr(static_cast(info.shader)); - } - } + IGPUGraphicsPipeline(params), m_vkPipeline(vk_pipeline) {} inline const void* getNativeHandle() const override {return &m_vkPipeline;} @@ -32,8 +22,6 @@ class CVulkanGraphicsPipeline final : public IGPUGraphicsPipeline ~CVulkanGraphicsPipeline(); const VkPipeline m_vkPipeline; - // gotta keep those VkShaderModules alive (for now) - core::smart_refctd_ptr m_shaders[GRAPHICS_SHADER_STAGE_COUNT]; }; } diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 2e30a18269..792ab719eb 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1,5 +1,6 @@ #include "nbl/video/CVulkanLogicalDevice.h" +#include "nbl/asset/utils/ISPIRVDebloater.h" #include "nbl/video/CThreadSafeQueueAdapter.h" #include "nbl/video/surface/CSurfaceVulkan.h" @@ -11,8 +12,6 @@ using namespace nbl; using namespace nbl::video; - - CVulkanLogicalDevice::CVulkanLogicalDevice(core::smart_refctd_ptr&& api, renderdoc_api_t* const rdoc, const IPhysicalDevice* const physicalDevice, const VkDevice vkdev, const SCreationParams& params) : ILogicalDevice(std::move(api),physicalDevice,params,rdoc), m_vkdev(vkdev), m_devf(vkdev), m_deferred_op_mempool(NODES_PER_BLOCK_DEFERRED_OP*sizeof(CVulkanDeferredOperation),1u,MAX_BLOCK_COUNT_DEFERRED_OP,static_cast(sizeof(CVulkanDeferredOperation))) @@ -518,22 +517,6 @@ auto CVulkanLogicalDevice::copyAccelerationStructureFromMemory_impl(IDeferredOpe } -core::smart_refctd_ptr CVulkanLogicalDevice::createShader_impl(const asset::ICPUShader* spirvShader) -{ - auto spirv = spirvShader->getContent(); - - VkShaderModuleCreateInfo vk_createInfo = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO }; - vk_createInfo.pNext = nullptr; - vk_createInfo.flags = static_cast(0u); // reserved for future use by Vulkan - vk_createInfo.codeSize = spirv->getSize(); - vk_createInfo.pCode = static_cast(spirv->getPointer()); - - VkShaderModule vk_shaderModule; - if (m_devf.vk.vkCreateShaderModule(m_vkdev,&vk_createInfo,nullptr,&vk_shaderModule)==VK_SUCCESS) - return core::make_smart_refctd_ptr(this,spirvShader->getStage(),std::string(spirvShader->getFilepathHint()),vk_shaderModule); - return nullptr; -} - core::smart_refctd_ptr CVulkanLogicalDevice::createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) { @@ -1050,9 +1033,11 @@ core::smart_refctd_ptr CVulkanLogicalDevice::createFramebuffer_ return nullptr; } - +// TODO: Change this to pass SPIR-V directly! VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( - const IGPUShader::SSpecInfo& specInfo, + const asset::IPipelineBase::SShaderSpecInfo& specInfo, + VkShaderModuleCreateInfo* &outShaderModule, + std::string* &outEntryPoints, VkPipelineShaderStageRequiredSubgroupSizeCreateInfo* &outRequiredSubgroupSize, VkSpecializationInfo* &outSpecInfo, VkSpecializationMapEntry* &outSpecMapEntry, uint8_t* &outSpecData ) @@ -1066,34 +1051,20 @@ VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( // provide tight control over the cache handles making sure you hit it (what the `FAIL_ON_PIPELINE_COMPILE_REQUIRED` flag is for), // in that case you have a `VkPipelineShaderStageModuleIdentifier` in `pNext` with non-zero length identifier. // TL;DR Basically you can skip needing the SPIR-V contents to hash the IGPUShader, we may implement this later on. - void** ppNext = const_cast(&retval.pNext); // TODO: VkShaderModuleValidationCacheCreateInfoEXT from VK_EXT_validation_cache // TODO: VkPipelineRobustnessCreateInfoEXT from VK_EXT_pipeline_robustness (allows per-pipeline control of robustness) - // Implicit: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02754 - using subgroup_size_t = std::remove_reference_t::SUBGROUP_SIZE; - if (specInfo.requiredSubgroupSize>=subgroup_size_t::REQUIRE_4) - { - *ppNext = outRequiredSubgroupSize; - ppNext = &outRequiredSubgroupSize->pNext; - outRequiredSubgroupSize->requiredSubgroupSize = 0x1u<(specInfo.requiredSubgroupSize); - outRequiredSubgroupSize++; - } - else if (specInfo.requiredSubgroupSize==subgroup_size_t::VARYING) - retval.flags = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT; - else - retval.flags = 0; + const auto stage = specInfo.stage; + + (*outEntryPoints) = specInfo.entryPoint; + const auto entryPointName = outEntryPoints->c_str(); + outEntryPoints++; - const auto stage = specInfo.shader->getStage(); - if (specInfo.requireFullSubgroups) - { - assert(stage==IGPUShader::E_SHADER_STAGE::ESS_COMPUTE/*TODO: Or Mesh Or Task*/); - retval.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; - } // Implicit: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00706 retval.stage = static_cast(stage); - retval.module = static_cast(specInfo.shader)->getInternalObject(); - retval.pName = specInfo.entryPoint.c_str(); + retval.module = VK_NULL_HANDLE; + retval.pName = entryPointName; + outSpecInfo->pMapEntries = outSpecMapEntry; outSpecInfo->dataSize = 0; const uint8_t* const specDataBegin = outSpecData; @@ -1115,6 +1086,35 @@ VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( outSpecInfo->mapEntryCount = 0; outSpecInfo->dataSize = std::distance(specDataBegin,outSpecData); retval.pSpecializationInfo = outSpecInfo++; + + + auto ppNext = &retval.pNext; + + const auto* spirv = specInfo.shader->getContent(); + outShaderModule->codeSize = spirv->getSize(); + outShaderModule->pCode = static_cast(spirv->getPointer()); + *ppNext = outShaderModule; + ppNext = &outShaderModule->pNext; + outShaderModule++; + + // Implicit: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02754 + using subgroup_size_t = std::remove_reference_t::SUBGROUP_SIZE; + if (specInfo.requiredSubgroupSize>=subgroup_size_t::REQUIRE_4) + { + *ppNext = outRequiredSubgroupSize; + outRequiredSubgroupSize->requiredSubgroupSize = 0x1u<(specInfo.requiredSubgroupSize); + outRequiredSubgroupSize++; + } + else if (specInfo.requiredSubgroupSize==subgroup_size_t::VARYING) + retval.flags = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT; + else + retval.flags = 0; + + if (specInfo.requireFullSubgroups) + { + assert(stage==hlsl::ShaderStage::ESS_COMPUTE/*TODO: Or Mesh Or Task*/); + retval.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + } } return retval; } @@ -1148,6 +1148,8 @@ void CVulkanLogicalDevice::createComputePipelines_impl( // pNext can only be VkComputePipelineIndirectBufferInfoNV, creation feedback, robustness and VkPipelineCreateFlags2CreateInfoKHR core::vector vk_createInfos(createInfos.size(),{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,nullptr}); + core::vector vk_shaderModule(createInfos.size(), {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); + core::vector entryPoints(createInfos.size()); core::vector vk_requiredSubgroupSize(createInfos.size(),{ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); @@ -1156,6 +1158,8 @@ void CVulkanLogicalDevice::createComputePipelines_impl( core::vector specializationData(validation.dataSize); auto outCreateInfo = vk_createInfos.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outEntryPoints = entryPoints.data(); auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); @@ -1163,10 +1167,12 @@ void CVulkanLogicalDevice::createComputePipelines_impl( for (const auto& info : createInfos) { initPipelineCreateInfo(outCreateInfo,info); - outCreateInfo->stage = getVkShaderStageCreateInfoFrom(info.shader,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); + const auto& spec = info.shader; + outCreateInfo->stage = getVkShaderStageCreateInfoFrom(spec, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); outCreateInfo++; } auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateComputePipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) { for (size_t i=0ull; i( core::smart_refctd_ptr(info.layout), - core::smart_refctd_ptr(static_cast(info.shader.shader)), info.flags,vk_pipeline ); + debugNameBuilder.str(""); + const auto& specInfo = createInfos[i].shader; + debugNameBuilder << specInfo.shader->getFilepathHint() << "(" << specInfo.entryPoint << "," << specInfo.stage << ")\n"; } } else @@ -1240,6 +1248,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); + core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); + core::vector entryPoints(maxShaderStages); core::vector vk_requiredSubgroupSize(maxShaderStages,{ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); @@ -1268,6 +1278,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outCreateInfo = vk_createInfos.data(); auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); @@ -1283,14 +1295,19 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outDepthStencil = vk_depthStencilStates.data(); auto outColorBlend = vk_colorBlendStates.data(); auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); + for (const auto& info : createInfos) { initPipelineCreateInfo(outCreateInfo,info); outCreateInfo->pStages = outShaderStage; for (const auto& spec : info.shaders) - if (spec.shader) - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); + { + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + } // when dealing with mesh shaders, the vertex input and assembly state will be null { { @@ -1328,8 +1345,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( for (const auto& spec : info.shaders) if (spec.shader) { - const auto stage = spec.shader->getStage(); - if (stage==IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL || stage==IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION) + const auto stage = spec.stage; + if (stage==hlsl::ShaderStage::ESS_TESSELLATION_CONTROL || stage==hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION) { outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; outCreateInfo->pTessellationState = outTessellation++; @@ -1409,8 +1426,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outCreateInfo->subpass = info.cached.subpassIx; outCreateInfo++; } - auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) { for (size_t i=0ull; i(createInfos[i],vk_pipeline); + debugNameBuilder.str(""); + for (const auto& shader: createInfos[i].shaders) + { + if (shader.shader != nullptr) + debugNameBuilder <getFilepathHint() << "(" << shader.entryPoint << "," << shader.stage << ")\n"; + } + output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); } } else @@ -1454,6 +1478,8 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( for (const auto& info : createInfos) maxShaderGroups += info.shaderGroups.getShaderGroupCount(); core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,nullptr }); + core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); + core::vector entryPoints(maxShaderStages); core::vector vk_requiredSubgroupSize(maxShaderStages,{ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); @@ -1464,6 +1490,8 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( core::vector specializationData(validation.dataSize); auto outCreateInfo = vk_createInfos.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outEntryPoints = entryPoints.data(); auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); auto outShaderStage = vk_shaderStage.data(); auto outShaderGroup = vk_shaderGroup.data(); @@ -1502,7 +1530,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( outCreateInfo->pStages = outShaderStage; for (const auto& specInfo : info.shaders) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo,outRequiredSubgroupSize,outSpecInfo,outSpecMapEntry,outSpecData); + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(specInfo, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo,outSpecMapEntry,outSpecData); } outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages,outShaderStage); assert(outCreateInfo->stageCount != 0); diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 3ed5e9983a..93d45dcc32 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -15,7 +15,6 @@ #include "nbl/video/CVulkanImageView.h" #include "nbl/video/CVulkanFramebuffer.h" #include "nbl/video/CVulkanSemaphore.h" -#include "nbl/video/CVulkanShader.h" #include "nbl/video/CVulkanCommandPool.h" #include "nbl/video/CVulkanDescriptorSetLayout.h" #include "nbl/video/CVulkanSampler.h" @@ -33,7 +32,6 @@ #include "nbl/video/CVulkanGraphicsPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" - namespace nbl::video { @@ -269,9 +267,6 @@ class CVulkanLogicalDevice final : public ILogicalDevice DEFERRABLE_RESULT copyAccelerationStructureToMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure::HostCopyToMemoryInfo& copyInfo) override; DEFERRABLE_RESULT copyAccelerationStructureFromMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure::HostCopyFromMemoryInfo& copyInfo) override; - // shaders - core::smart_refctd_ptr createShader_impl(const asset::ICPUShader* spirvShader) override; - // layouts core::smart_refctd_ptr createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) override; core::smart_refctd_ptr createPipelineLayout_impl( @@ -326,6 +321,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice memory_pool_mt_t m_deferred_op_mempool; VkDescriptorSetLayout m_dummyDSLayout; + }; } diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 3b7df3a9dd..3b1a3b96cc 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -357,7 +357,7 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart properties.deviceLUIDValid = vulkan11Properties.deviceLUIDValid; properties.limits.subgroupSize = vulkan11Properties.subgroupSize; - properties.limits.subgroupOpsShaderStages = static_cast(vulkan11Properties.subgroupSupportedStages); + properties.limits.subgroupOpsShaderStages = static_cast(vulkan11Properties.subgroupSupportedStages); // ROADMAP 2022 would also like ARITHMETIC and QUAD constexpr uint32_t NablaSubgroupOperationMask = VK_SUBGROUP_FEATURE_BASIC_BIT|VK_SUBGROUP_FEATURE_VOTE_BIT|VK_SUBGROUP_FEATURE_BALLOT_BIT|VK_SUBGROUP_FEATURE_SHUFFLE_BIT|VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; if ((vulkan11Properties.subgroupSupportedOperations&NablaSubgroupOperationMask)!=NablaSubgroupOperationMask) diff --git a/src/nbl/video/CVulkanRayTracingPipeline.cpp b/src/nbl/video/CVulkanRayTracingPipeline.cpp index 0db3ca94ed..a107d3bbed 100644 --- a/src/nbl/video/CVulkanRayTracingPipeline.cpp +++ b/src/nbl/video/CVulkanRayTracingPipeline.cpp @@ -15,15 +15,11 @@ namespace nbl::video ShaderGroupHandleContainer&& shaderGroupHandles) : IGPURayTracingPipeline(params), m_vkPipeline(vk_pipeline), - m_shaders(core::make_refctd_dynamic_array(params.shaders.size())), m_missStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.misses.size())), m_hitGroupStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), m_callableStackSizes(core::make_refctd_dynamic_array(params.shaderGroups.hits.size())), m_shaderGroupHandles(std::move(shaderGroupHandles)) { - for (size_t shaderIx = 0; shaderIx < params.shaders.size(); shaderIx++) - m_shaders->operator[](shaderIx) = ShaderRef(static_cast(params.shaders[shaderIx].shader)); - const auto* vulkanDevice = static_cast(getOriginDevice()); auto* vk = vulkanDevice->getFunctionTable(); diff --git a/src/nbl/video/CVulkanShader.cpp b/src/nbl/video/CVulkanShader.cpp deleted file mode 100644 index acf2b4aa3d..0000000000 --- a/src/nbl/video/CVulkanShader.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "nbl/video/CVulkanShader.h" - -#include "nbl/video/CVulkanLogicalDevice.h" - -namespace nbl::video -{ - -CVulkanShader::~CVulkanShader() -{ - const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); - auto* vk = vulkanDevice->getFunctionTable(); - vk->vk.vkDestroyShaderModule(vulkanDevice->getInternalObject(), m_vkShaderModule, nullptr); -} - -} \ No newline at end of file diff --git a/src/nbl/video/CVulkanShader.h b/src/nbl/video/CVulkanShader.h deleted file mode 100644 index 979c1caa61..0000000000 --- a/src/nbl/video/CVulkanShader.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _NBL_VIDEO_C_VULKAN_SHADER_H_INCLUDED_ -#define _NBL_VIDEO_C_VULKAN_SHADER_H_INCLUDED_ - - -#include "nbl/video/IGPUShader.h" - - -namespace nbl::video -{ - -class ILogicalDevice; - -class CVulkanShader : public IGPUShader -{ - public: - CVulkanShader(const ILogicalDevice* dev, const E_SHADER_STAGE stage, std::string&& filepathHint, const VkShaderModule vk_shaderModule) : - IGPUShader(core::smart_refctd_ptr(dev), stage, std::move(filepathHint)), m_vkShaderModule(vk_shaderModule) {} - - inline VkShaderModule getInternalObject() const { return m_vkShaderModule; } - - private: - ~CVulkanShader(); - - VkShaderModule m_vkShaderModule = VK_NULL_HANDLE; - -}; - -} -#endif diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index fcf55b74c1..3e776782fc 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1081,7 +1081,7 @@ bool IGPUCommandBuffer::bindDescriptorSets( return bindDescriptorSets_impl(pipelineBindPoint, layout, firstSet, descriptorSetCount, pDescriptorSets, dynamicOffsetCount, dynamicOffsets); } -bool IGPUCommandBuffer::pushConstants(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) +bool IGPUCommandBuffer::pushConstants(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT)) return false; diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index d951deeb3d..26cfc4c6a8 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -7,10 +7,50 @@ using namespace nbl; using namespace nbl::video; +static void debloatShaders(const asset::ISPIRVDebloater& debloater, std::span shaderSpecs, core::vector>& outShaders, asset::IPipelineBase::SShaderSpecInfo* outShaderSpecInfos, system::logger_opt_ptr logger = nullptr) +{ + using EntryPoints = core::set; + core::map entryPointsMap; + + // collect all entry points first before we debloat + for (const auto& shaderSpec : shaderSpecs) { + const auto* shader = shaderSpec.shader; + auto it = entryPointsMap.find(shader); + if (it == entryPointsMap.end() || it->first != shader) + it = entryPointsMap.emplace_hint(it, shader, EntryPoints()); + it->second.insert({ .name = shaderSpec.entryPoint, .stage = shaderSpec.stage }); + } + + core::map debloatedShaders; + for (const auto& shaderSpec: shaderSpecs) + { + const auto* shader = shaderSpec.shader; + const auto& entryPoints = entryPointsMap[shader]; + + auto debloatedShaderSpec = shaderSpec; + if (shader != nullptr) + { + if (!debloatedShaders.contains(shader)) + { + const auto outShadersData = outShaders.data(); + outShaders.push_back(debloater.debloat(shader, entryPoints, logger)); + assert(outShadersData == outShaders.data()); + debloatedShaders.emplace(shader, outShaders.back().get()); + } + const auto debloatedShader = debloatedShaders[shader]; + debloatedShaderSpec.shader = debloatedShader; + } + *outShaderSpecInfos = debloatedShaderSpec; + + outShaderSpecInfos++; + } + +} ILogicalDevice::ILogicalDevice(core::smart_refctd_ptr&& api, const IPhysicalDevice* const physicalDevice, const SCreationParams& params, const bool runningInRenderdoc) : m_api(api), m_physicalDevice(physicalDevice), m_enabledFeatures(params.featuresToEnable), m_compilerSet(params.compilerSet), - m_logger(m_physicalDevice->getDebugCallback() ? m_physicalDevice->getDebugCallback()->getLogger() : nullptr) + m_logger(m_physicalDevice->getDebugCallback() ? m_physicalDevice->getDebugCallback()->getLogger() : nullptr), + m_spirvDebloater(core::make_smart_refctd_ptr()) { { uint32_t qcnt = 0u; @@ -274,96 +314,41 @@ core::smart_refctd_ptr ILogicalDevice::createBufferView(const as } -core::smart_refctd_ptr ILogicalDevice::compileShader(const SShaderCreationParameters& creationParams) +core::smart_refctd_ptr ILogicalDevice::compileShader(const SShaderCreationParameters& creationParams) { - if (!creationParams.cpushader) + const auto source = creationParams.source; + if (!source) { - NBL_LOG_ERROR("No valid CPU Shader supplied"); - return nullptr; - } - - const asset::IShader::E_SHADER_STAGE shaderStage = creationParams.cpushader->getStage(); - const auto& features = getEnabledFeatures(); - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00704 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00705 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-02091 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-02092 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00706 - switch (shaderStage) - { - case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION: - if (!features.tessellationShader) - { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", creationParams.cpushader); - return nullptr; - } - break; - case IGPUShader::E_SHADER_STAGE::ESS_GEOMETRY: - if (!features.geometryShader) - { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", creationParams.cpushader); - return nullptr; - } - break; - case IGPUShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_VERTEX: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_COMPUTE: - break; - // unsupported yet - case IGPUShader::E_SHADER_STAGE::ESS_TASK: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_MESH: - NBL_LOG_ERROR("Unsupported (yet) shader stage"); + NBL_LOG_ERROR("No valid Source Shader supplied"); return nullptr; - break; - case IGPUShader::E_SHADER_STAGE::ESS_RAYGEN: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_ANY_HIT: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_MISS: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_INTERSECTION: [[fallthrough]]; - case IGPUShader::E_SHADER_STAGE::ESS_CALLABLE: - if (!features.rayTracingPipeline) - { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Raytracing Pipeline feature not enabled!", creationParams.cpushader); - return nullptr; - } - break; - default: - // Implicit unsupported stages or weird multi-bit stage enum values - NBL_LOG_ERROR("Unknown Shader Stage %d", shaderStage); - return nullptr; - break; } - core::smart_refctd_ptr spirvShader; - if (creationParams.cpushader->getContentType() == asset::ICPUShader::E_CONTENT_TYPE::ECT_SPIRV) + core::smart_refctd_ptr spirvShader; + const auto sourceContent = source->getContentType(); + if (sourceContent==asset::IShader::E_CONTENT_TYPE::ECT_SPIRV) { if (creationParams.optimizer) { - spirvShader = core::make_smart_refctd_ptr( - std::move(creationParams.optimizer->optimize(creationParams.cpushader->getContent(), m_logger)), - shaderStage, asset::ICPUShader::E_CONTENT_TYPE::ECT_SPIRV, - std::string(creationParams.cpushader->getFilepathHint())); + spirvShader = core::make_smart_refctd_ptr( + std::move(creationParams.optimizer->optimize(source->getContent(), m_logger)), + asset::IShader::E_CONTENT_TYPE::ECT_SPIRV, + std::string(source->getFilepathHint())); } else - { - spirvShader = asset::IAsset::castDown(creationParams.cpushader->clone()); - } + spirvShader = asset::IAsset::castDown(source->clone(0)); } else { - auto compiler = m_compilerSet->getShaderCompiler(creationParams.cpushader->getContentType()); + auto compiler = m_compilerSet->getShaderCompiler(sourceContent); asset::IShaderCompiler::SCompilerOptions commonCompileOptions = {}; - commonCompileOptions.preprocessorOptions.logger = m_physicalDevice->getDebugCallback() ? m_physicalDevice->getDebugCallback()->getLogger() : nullptr; + commonCompileOptions.preprocessorOptions.logger = m_physicalDevice->getDebugCallback() ? m_physicalDevice->getDebugCallback()->getLogger():nullptr; commonCompileOptions.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); // to resolve includes before compilation - commonCompileOptions.preprocessorOptions.sourceIdentifier = creationParams.cpushader->getFilepathHint().c_str(); - commonCompileOptions.preprocessorOptions.extraDefines = {}; + commonCompileOptions.preprocessorOptions.sourceIdentifier = source->getFilepathHint().c_str(); + commonCompileOptions.preprocessorOptions.extraDefines = creationParams.extraDefines; - commonCompileOptions.stage = shaderStage; + commonCompileOptions.stage = creationParams.stage; commonCompileOptions.debugInfoFlags = asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT | asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT; @@ -373,15 +358,13 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SS commonCompileOptions.readCache = creationParams.readCache; commonCompileOptions.writeCache = creationParams.writeCache; - if (creationParams.cpushader->getContentType() == asset::ICPUShader::E_CONTENT_TYPE::ECT_HLSL) + if (sourceContent==asset::IShader::E_CONTENT_TYPE::ECT_HLSL) { // TODO: add specific HLSLCompiler::SOption params - spirvShader = m_compilerSet->compileToSPIRV(creationParams.cpushader, commonCompileOptions); - } - else if (creationParams.cpushader->getContentType() == asset::ICPUShader::E_CONTENT_TYPE::ECT_GLSL) - { - spirvShader = m_compilerSet->compileToSPIRV(creationParams.cpushader, commonCompileOptions); + spirvShader = m_compilerSet->compileToSPIRV(source,commonCompileOptions); } + else if (sourceContent==asset::IShader::E_CONTENT_TYPE::ECT_GLSL) + spirvShader = m_compilerSet->compileToSPIRV(source,commonCompileOptions); else { NBL_LOG_ERROR("Unknown shader content type"); @@ -391,14 +374,14 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SS if (!spirvShader) { - NBL_LOG_ERROR("SPIR-V Compilation from non SPIR-V shader %p failed", creationParams.cpushader); + NBL_LOG_ERROR("SPIR-V Compilation from non SPIR-V shader %p failed", source); return nullptr; } auto spirv = spirvShader->getContent(); if (!spirv) { - NBL_LOG_ERROR("SPIR-V Compilation from non SPIR-V shader %p failed", creationParams.cpushader); + NBL_LOG_ERROR("SPIR-V Compilation from non SPIR-V shader %p failed, no content", source); return nullptr; } @@ -406,7 +389,7 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SS if constexpr (true) { system::ISystem::future_t> future; - m_physicalDevice->getSystem()->createFile(future, system::path(creationParams.cpushader->getFilepathHint()).parent_path() / "compiled.spv", system::IFileBase::ECF_WRITE); + m_physicalDevice->getSystem()->createFile(future, system::path(source->getFilepathHint()).parent_path()/"compiled.spv", system::IFileBase::ECF_WRITE); if (auto file = future.acquire(); file && bool(*file)) { system::IFile::success_t succ; @@ -418,24 +401,6 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SS return spirvShader; } -core::smart_refctd_ptr ILogicalDevice::createShader(const SShaderCreationParameters& creationParams) -{ - auto cpuShader = compileShader(creationParams); - if (!cpuShader) - return nullptr; - - auto shader = createShader_impl(cpuShader.get()); - const auto path = creationParams.cpushader->getFilepathHint(); - if (shader && !path.empty()) - shader->setObjectDebugName(path.c_str()); - return shader; -} - -core::smart_refctd_ptr ILogicalDevice::createShader(const asset::ICPUShader* cpushader, const asset::ISPIRVOptimizer* optimizer) -{ - return ILogicalDevice::createShader({ cpushader, optimizer, nullptr }); -} - core::smart_refctd_ptr ILogicalDevice::createDescriptorSetLayout(const std::span bindings) { // TODO: MORE VALIDATION, but after descriptor indexing. @@ -813,6 +778,55 @@ asset::ICPUPipelineCache::SCacheKey ILogicalDevice::getPipelineCacheKey() const return key; } +bool ILogicalDevice::createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output) +{ + std::fill_n(output,params.size(),nullptr); + IGPUComputePipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const asset::IPipelineBase::SShaderSpecInfo& info)->bool + { + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-pNext-02755 + if (info.requiredSubgroupSize>=asset::IPipelineBase::SShaderSpecInfo::SUBGROUP_SIZE::REQUIRE_4 && !getPhysicalDeviceLimits().requiredSubgroupSizeStages.hasFlags(info.stage)) + { + NBL_LOG_ERROR("Invalid shader stage"); + return false; + } + return true; + }); + if (!specConstantValidation) + { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + { + return sum + param.getShaders().size(); + }); + core::vector> debloatedShaders; // vector to hold all the debloated shaders, so the pointer from the new ShaderSpecInfo is not dangling + debloatedShaders.reserve(shaderCount); + + for (auto ix = 0u; ix < params.size(); ix++) + { + const auto& ci = params[ix]; + debloatShaders(*m_spirvDebloater.get(), ci.getShaders(), debloatedShaders, &newParams[ix].shader, m_logger); + } + + createComputePipelines_impl(pipelineCache,newParams,output,specConstantValidation); + + bool retval = true; + for (auto i=0u; isetObjectDebugName(params[i].shader.shader->getFilepathHint().c_str()); + } + return retval; +} + bool ILogicalDevice::createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, @@ -821,11 +835,11 @@ bool ILogicalDevice::createGraphicsPipelines( { std::fill_n(output, params.size(), nullptr); IGPUGraphicsPipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(nullptr, params, - [this](const IGPUShader::SSpecInfo& info)->bool + [this](const asset::IPipelineBase::SShaderSpecInfo& info)->bool { - if (!info.shader) - return false; - return info.shader->wasCreatedBy(this); + if (info.stage != hlsl::ShaderStage::ESS_VERTEX) + return true; + return info.shader; } ); if (!specConstantValidation) @@ -836,6 +850,17 @@ bool ILogicalDevice::createGraphicsPipelines( const auto& features = getEnabledFeatures(); const auto& limits = getPhysicalDeviceLimits(); + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + { + return sum + param.getShaders().size(); + }); + core::vector> debloatedShaders; // vector to hold all the debloated shaders, so the pointer from the new ShaderSpecInfo is not dangling + debloatedShaders.reserve(shaderCount); + + core::vector debloatedShaderSpecs(shaderCount); + auto outShaderSpecs = debloatedShaderSpecs.data(); + for (auto ix = 0u; ix < params.size(); ix++) { const auto& ci = params[ix]; @@ -928,15 +953,25 @@ bool ILogicalDevice::createGraphicsPipelines( } } } + + newParams[ix].shaders = std::span(outShaderSpecs, ci.getShaders().size()); + debloatShaders(*m_spirvDebloater.get(), ci.getShaders(), debloatedShaders, outShaderSpecs, m_logger); } - createGraphicsPipelines_impl(pipelineCache, params, output, specConstantValidation); - for (auto i = 0u; i < params.size(); i++) + createGraphicsPipelines_impl(pipelineCache, newParams, output, specConstantValidation); + + for (auto i=0u; i* const output) { std::fill_n(output,params.size(),nullptr); - IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const IGPUShader::SSpecInfo& info)->bool + IGPURayTracingPipeline::SCreationParams::SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params,[this](const asset::IPipelineBase::SShaderSpecInfo& info)->bool { - if (!info.shader->wasCreatedBy(this)) - { - NBL_LOG_ERROR("The shader was not created by this device"); - return false; - } return true; }); if (!specConstantValidation) @@ -997,9 +1027,23 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + { + return sum + param.getShaders().size(); + }); + core::vector> debloatedShaders; // vector to hold all the debloated shaders, so the pointer from the new ShaderSpecInfo is not dangling + debloatedShaders.reserve(shaderCount); + + core::vector debloatedShaderSpecs(shaderCount); + auto outShaderSpecs = debloatedShaderSpecs.data(); + const auto& limits = getPhysicalDeviceLimits(); - for (const auto& param : params) + for (auto ix = 0u; ix < params.size(); ix++) { + + const auto& param = params[ix]; + // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-maxPipelineRayRecursionDepth-03589 if (param.cached.maxRecursionDepth > limits.maxRayRecursionDepth) { @@ -1011,9 +1055,12 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline NBL_LOG_ERROR("Pipeline must have at least one shader."); return false; } + + newParams[ix].shaders = std::span(outShaderSpecs, param.getShaders().size()); + debloatShaders(*m_spirvDebloater.get(), param.getShaders(), debloatedShaders, outShaderSpecs, m_logger); } - createRayTracingPipelines_impl(pipelineCache,params,output,specConstantValidation); + createRayTracingPipelines_impl(pipelineCache, newParams,output,specConstantValidation); bool retval = true; for (auto i=0u; i::valid(const ILogicalDevice* dev return true; } - -CAssetConverter::patch_impl_t::patch_impl_t(const ICPUShader* shader) : stage(shader->getStage()) {} -bool CAssetConverter::patch_impl_t::valid(const ILogicalDevice* device) -{ - const auto& features = device->getEnabledFeatures(); - switch (stage) - { - // supported always - case IGPUShader::E_SHADER_STAGE::ESS_VERTEX: - case IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT: - case IGPUShader::E_SHADER_STAGE::ESS_COMPUTE: - return true; - case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL: - case IGPUShader::E_SHADER_STAGE::ESS_TESSELLATION_EVALUATION: - if (features.tessellationShader) - return true; - break; - case IGPUShader::E_SHADER_STAGE::ESS_GEOMETRY: - if (features.geometryShader) - return true; - break; - case IGPUShader::E_SHADER_STAGE::ESS_TASK: -// if (features.taskShader) -// return true; - break; - case IGPUShader::E_SHADER_STAGE::ESS_MESH: -// if (features.meshShader) -// return true; - break; - case IGPUShader::E_SHADER_STAGE::ESS_RAYGEN: - case IGPUShader::E_SHADER_STAGE::ESS_ANY_HIT: - case IGPUShader::E_SHADER_STAGE::ESS_CLOSEST_HIT: - case IGPUShader::E_SHADER_STAGE::ESS_MISS: - case IGPUShader::E_SHADER_STAGE::ESS_INTERSECTION: - case IGPUShader::E_SHADER_STAGE::ESS_CALLABLE: - if (features.rayTracingPipeline) - return true; - break; - default: - break; - } - return false; -} - CAssetConverter::patch_impl_t::patch_impl_t(const ICPUBuffer* buffer) : usage(buffer->getUsageFlags()) {} bool CAssetConverter::patch_impl_t::valid(const ILogicalDevice* device) { @@ -253,6 +209,15 @@ void deduceMetaUsages(Patch& patch, const core::bitflag +static std::string_view getLoggingLabel(const AssetType& asset) +{ + if constexpr (std::same_as) + return asset.getFilepathHint(); + else + return asset.getObjectDebugName(); +} + CAssetConverter::patch_impl_t::patch_impl_t(const ICPUImage* image) { const auto& params = image->getCreationParameters(); @@ -555,12 +520,7 @@ class AssetVisitor : public CRTP return false; const auto& specInfo = asset->getSpecInfo(); const auto* shader = specInfo.shader; - if (!shader) - return false; - CAssetConverter::patch_t patch = {shader}; - constexpr auto stage = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE; - patch.stage = stage; - if (!descend(shader,std::move(patch),stage,specInfo)) + if (!shader || !descend(shader,{shader},specInfo)) return false; return true; } @@ -573,7 +533,7 @@ class AssetVisitor : public CRTP const auto* rpass = asset->getRenderpass(); if (!rpass || !descend(rpass,{rpass})) return false; - using stage_t = ICPUShader::E_SHADER_STAGE; + using stage_t = hlsl::ShaderStage; for (stage_t stage : {stage_t::ESS_VERTEX,stage_t::ESS_TESSELLATION_CONTROL,stage_t::ESS_TESSELLATION_EVALUATION,stage_t::ESS_GEOMETRY,stage_t::ESS_FRAGMENT}) { const auto& specInfo = asset->getSpecInfo(stage); @@ -582,12 +542,10 @@ class AssetVisitor : public CRTP { if (stage==stage_t::ESS_VERTEX) // required return false; - CRTP::template nullOptional(); + CRTP::template nullOptional(); continue; } - CAssetConverter::patch_t patch = {shader}; - patch.stage = stage; - if (!descend(shader,std::move(patch),stage,specInfo)) + if (!descend(shader,{shader},specInfo)) return false; } return true; @@ -924,8 +882,8 @@ class DFSVisitor return {}; } // special checks (normally the GPU object creation will fail, but these are common pitfall paths, so issue errors earlier for select problems) - if constexpr (std::is_same_v) - if (dep.asset->getContentType()==ICPUShader::E_CONTENT_TYPE::ECT_GLSL) + if constexpr (std::is_same_v) + if (dep.asset->getContentType()==IShader::E_CONTENT_TYPE::ECT_GLSL) { inputs.logger.log("Asset Converter doesn't support converting GLSL shaders! Asset %p won't be converted (GLSL is deprecated in Nabla)",system::ILogger::ELL_ERROR,dep.asset); return {}; @@ -1029,7 +987,6 @@ class PatchOverride final : public CAssetConverter::CHashCache::IPatchOverride inline const patch_t* operator()(const lookup_t& lookup) const override {return impl(lookup);} - inline const patch_t* operator()(const lookup_t& lookup) const override {return impl(lookup);} inline const patch_t* operator()(const lookup_t& lookup) const override {return impl(lookup);} inline const patch_t* operator()(const lookup_t& lookup) const override {return impl(lookup);} inline const patch_t* operator()(const lookup_t& lookup) const override {return impl(lookup);} @@ -1077,27 +1034,27 @@ class HashVisit : public CAssetConverter::CHashCache::hash_impl_base { auto argTuple = std::tuple(extraArgs...); const auto& arg0 = std::get<0>(argTuple); - if constexpr (sizeof...(extraArgs)>1) + // hash the spec info + if constexpr (std::is_same_v) { - const auto& arg1 = std::get<1>(argTuple); - // hash the spec info - if constexpr (std::is_same_v) + hasher << arg0.entryPoint; + hasher << arg0.stage; + hasher << arg0.requiredSubgroupSize; + switch (arg0.stage) { - hasher << arg1.entryPoint; - for (const auto& specConstant : *arg1.entries) - { + case hlsl::ShaderStage::ESS_COMPUTE: + hasher << arg0.requireFullSubgroups; + break; + default: + break; + } + if (arg0.entries) + { + for (const auto& specConstant : *arg0.entries) + { hasher << specConstant.first; - hasher.update(specConstant.second.data,specConstant.second.size); - } - hasher << arg1.requiredSubgroupSize; - switch (arg0) - { - case IShader::E_SHADER_STAGE::ESS_COMPUTE: - hasher << arg1.requireFullSubgroups; - break; - default: - break; - } + hasher.update(specConstant.second.data, specConstant.second.size); + } } } } @@ -1118,15 +1075,14 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lo hasher.update(&patchedParams,sizeof(patchedParams)); return true; } -bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) +bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) { const auto* asset = lookup.asset; - hasher << lookup.patch->stage; const auto type = asset->getContentType(); hasher << type; // if not SPIR-V then own path matters - if (type!=ICPUShader::E_CONTENT_TYPE::ECT_SPIRV) + if (type!=IShader::E_CONTENT_TYPE::ECT_SPIRV) hasher << asset->getFilepathHint(); const auto* content = asset->getContent(); if (!content || content->getContentHash()==NoContentHash) @@ -1605,9 +1561,9 @@ void CAssetConverter::CHashCache::eraseStale(const IPatchOverride* patchOverride // only once all the descriptor types have been hashed, we can hash sets rehash.template operator()(); // naturally any pipeline depends on shaders and pipeline cache - rehash.template operator()(); - rehash.template operator()(); - rehash.template operator()(); + rehash.operator()(); + rehash.operator()(); + rehash.operator()(); // graphics pipeline needs a renderpass rehash.template operator()(); rehash.template operator()(); @@ -1771,7 +1727,7 @@ class GetDependantVisit : public GetDependantVisitBase::value*/sizeof(IShader::E_SHADER_STAGE)*8> specInfo = {}; + std::array::value*/sizeof(IShader::E_SHADER_STAGE)*8> specInfo = {}; protected: bool descend_impl( @@ -1787,19 +1743,19 @@ class GetDependantVisit : public GetDependantVisitBase& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch, - const IShader::E_SHADER_STAGE stage, const IShader::SSpecInfo& inSpecInfo + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const IPipelineBase::SShaderSpecInfo& inSpecInfo ) { - auto depObj = getDependant(dep,soloPatch); + auto depObj = getDependant(dep,soloPatch); if (!depObj) return false; - getSpecInfo(stage) = { - .entryPoint = inSpecInfo.entryPoint, + getSpecInfo(inSpecInfo.stage) = { .shader = depObj.get(), - .entries = inSpecInfo.entries, + .entryPoint = inSpecInfo.entryPoint, // warning: its a `string_view` now! + .stage = inSpecInfo.stage, .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, - .requireFullSubgroups = inSpecInfo.requireFullSubgroups + .requireFullSubgroups = inSpecInfo.requireFullSubgroups, + .entries = inSpecInfo.entries }; return true; } @@ -1819,7 +1775,7 @@ class GetDependantVisit : public GetDependantVisitBase::value*/sizeof(IShader::E_SHADER_STAGE)*8> specInfo = {}; + std::array::value*/sizeof(IShader::E_SHADER_STAGE)*8> specInfo = {}; // optionals (done this way because inheritance chain with templated class hides protected methods) IGPURenderpass* renderpass = nullptr; @@ -1837,19 +1793,19 @@ class GetDependantVisit : public GetDependantVisitBase& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch, - const IShader::E_SHADER_STAGE stage, const IShader::SSpecInfo& inSpecInfo + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const IPipelineBase::SShaderSpecInfo& inSpecInfo ) { - auto depObj = getDependant(dep,soloPatch); + auto depObj = getDependant(dep,soloPatch); if (!depObj) return false; - getSpecInfo(stage) = { - .entryPoint = inSpecInfo.entryPoint, + getSpecInfo(inSpecInfo.stage) = { .shader = depObj.get(), - .entries = inSpecInfo.entries, + .entryPoint = inSpecInfo.entryPoint, // warning: its a `string_view` now! + .stage = inSpecInfo.stage, .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, - .requireFullSubgroups = 0 + .requireFullSubgroups = 0, + .entries = inSpecInfo.entries }; return true; } @@ -2991,18 +2947,25 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } } } - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { ILogicalDevice::SShaderCreationParameters createParams = { .optimizer = m_params.optimizer.get(), .readCache = inputs.readShaderCache, .writeCache = inputs.writeShaderCache }; + + // no one depend on the converted IShaders so we need to hold a smart ptr into them somewhere. + // This is to prevent m_stagingCache to hold a dangling pointer into IShader + retval.m_shaders.reserve(gpuObjUniqueCopyGroupIDs.size()); + for (auto& entry : conversionRequests) for (auto i=0ull; icreateShader(createParams)); + createParams.source = entry.second.canonicalAsset; + auto shader = device->compileShader(createParams); + retval.m_shaders.push_back(shader); + assign(entry.first,entry.second.firstCopyIx,i,std::move(shader)); } } if constexpr (std::is_same_v) @@ -3084,7 +3047,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult { pcRanges.clear(); asset::SPushConstantRange prev = { - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_UNKNOWN, + .stageFlags = hlsl::ShaderStage::ESS_UNKNOWN, .offset = 0, .size = 0 }; @@ -3185,7 +3148,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } if constexpr (std::is_same_v) { - core::vector tmpSpecInfo; + core::vector tmpSpecInfo; tmpSpecInfo.reserve(5); for (auto& entry : conversionRequests) { @@ -3213,7 +3176,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult params.renderpass = visitor.renderpass; // while there are patches possible for shaders, the only patch which can happen here is changing a stage from UNKNOWN to match the slot here tmpSpecInfo.clear(); - using stage_t = ICPUShader::E_SHADER_STAGE; + using stage_t = hlsl::ShaderStage; for (stage_t stage : {stage_t::ESS_VERTEX,stage_t::ESS_TESSELLATION_CONTROL,stage_t::ESS_TESSELLATION_EVALUATION,stage_t::ESS_GEOMETRY,stage_t::ESS_FRAGMENT}) { auto& info = visitor.getSpecInfo(stage); @@ -3327,7 +3290,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult for (const auto& byte : contentHash.data) debugName << uint32_t(byte) << " "; debugName << "for Group " << uniqueCopyGroupID; - gpuObj.get()->setObjectDebugName(debugName.str().c_str()); + + // IShader is ethereal not really a persistent gpu object + if constexpr (std::is_base_of_v) + gpuObj.get()->setObjectDebugName(debugName.str().c_str()); } // insert into staging cache stagingCache.emplace(gpuObj.get(),typename CCache::key_t(contentHash,uniqueCopyGroupID)); @@ -3354,6 +3320,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult // TODO: BLAS and TLAS requests } ); + }; // The order of these calls is super important to go BOTTOM UP in terms of hashing and conversion dependants. // Both so we can hash in O(Depth) and not O(Depth^2) but also so we have all the possible dependants ready. @@ -3439,19 +3406,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult if (retval.willDeviceASBuild()) retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; } + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); + dedupCreateProp.operator()(); +// dedupCreateProp.operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); -// dedupCreateProp.template operator()(); } // write out results @@ -3669,7 +3636,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul return const_cast(&found->second.value); }; // wipe gpu item in staging cache (this may drop it as well if it was made for only a root asset == no users) - core::unordered_map outputReverseMap; + core::unordered_map outputReverseMap; core::for_each_in_tuple(reservations.m_gpuObjects,[&outputReverseMap](const auto& gpuObjects)->void { uint32_t i = 0; @@ -3880,19 +3847,19 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul const uint32_t imageCount = imagesToUpload.size(); const uint32_t computeMultiBufferingCount = params.compute->scratchCommandBuffers.size(); const IGPUDescriptorSetLayout::SBinding bindings[3] = { - {.binding=0,.type=IDescriptor::E_TYPE::ET_SAMPLER,.createFlags=BindingFlags,.stageFlags=IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,.count=1,.immutableSamplers=&repeatSampler}, + {.binding=0,.type=IDescriptor::E_TYPE::ET_SAMPLER,.createFlags=BindingFlags,.stageFlags=hlsl::ShaderStage::ESS_COMPUTE,.count=1,.immutableSamplers=&repeatSampler}, { .binding = SrcMipBinding, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = BindingFlags, - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, .count = std::min(std::max(computeMultiBufferingCount,params.sampledImageBindingCount),imageCount) }, { .binding = DstMipBinding, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = BindingFlags, - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, .count = std::min(std::max(MaxMipLevelsPastBase*computeMultiBufferingCount,params.storageImageBindingCount),MaxMipLevelsPastBase*imageCount) } }; @@ -5034,7 +5001,8 @@ if (worstSize>minScratchSize) auto* pGpuObj = item.first; if (depsMissing) { - logger.log("GPU Obj %s not writing to final cache because conversion of a dependant failed!",system::ILogger::ELL_ERROR,pGpuObj->getObjectDebugName()); + const auto* hashAsU64 = reinterpret_cast(item.second.value.data); + logger.log("GPU Obj %s not writing to final cache because conversion of a dependant failed!", system::ILogger::ELL_ERROR, getLoggingLabel(*pGpuObj)); // wipe self, to let users know item.second.value = {}; continue; @@ -5064,7 +5032,7 @@ if (worstSize>minScratchSize) mergeCache.template operator()(); mergeCache.template operator()(); mergeCache.template operator()(); - mergeCache.template operator()(); + mergeCache.template operator()(); mergeCache.template operator()(); mergeCache.template operator()(); mergeCache.template operator()(); diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index f244e4b052..4c3bbaa03c 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -66,9 +66,8 @@ struct ConstevalParameters }(); auto createPipeline = [&limits,layout,&common](const char* mainPath)->smart_refctd_ptr { - auto shader = make_smart_refctd_ptr( + auto shader = make_smart_refctd_ptr( (common+"\n#include \""+mainPath+"\"\n").c_str(), - IShader::E_SHADER_STAGE::ESS_COMPUTE, IShader::E_CONTENT_TYPE::ECT_HLSL, mainPath ); @@ -82,7 +81,7 @@ struct ConstevalParameters params.layout = layout; params.shader.entryPoint = "main"; params.shader.shader = shader.get(); - params.shader.requiredSubgroupSize = static_cast(findMSB(limits.maxSubgroupSize)); + params.shader.requiredSubgroupSize = static_cast(findMSB(limits.maxSubgroupSize)); // needed for the prefix and reductions to work params.shader.requireFullSubgroups = true; return ICPUComputePipeline::create(params); diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 0b2e120029..f659b1adb4 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -6,6 +6,7 @@ #include #include +#include "nbl/asset/metadata/CHLSLMetadata.h" #include "nlohmann/json.hpp" using json = nlohmann::json; @@ -218,13 +219,13 @@ class ShaderCompiler final : public system::IApplicationFramework m_include_search_paths.emplace_back(m_arguments[i + 1]); } - auto shader = open_shader_file(file_to_compile); + auto [shader, shaderStage] = open_shader_file(file_to_compile); if (shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) { m_logger->log("Error. Loaded shader file content is not HLSL.", ILogger::ELL_ERROR); return false; } - auto compilation_result = compile_shader(shader.get(), file_to_compile); + auto compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); // writie compiled shader to file as bytes if (compilation_result) @@ -285,11 +286,11 @@ class ShaderCompiler final : public system::IApplicationFramework private: - core::smart_refctd_ptr compile_shader(const ICPUShader* shader, std::string_view sourceIdentifier) { + core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); CHLSLCompiler::SOptions options = {}; - options.stage = shader->getStage(); + options.stage = shaderStage; options.preprocessorOptions.sourceIdentifier = sourceIdentifier; options.preprocessorOptions.logger = m_logger.get(); @@ -309,7 +310,7 @@ class ShaderCompiler final : public system::IApplicationFramework } - core::smart_refctd_ptr open_shader_file(std::string filepath) { + std::tuple, hlsl::ShaderStage> open_shader_file(std::string filepath) { m_assetMgr = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); @@ -318,9 +319,10 @@ class ShaderCompiler final : public system::IApplicationFramework lp.workingDirectory = localInputCWD; auto assetBundle = m_assetMgr->getAsset(filepath, lp); const auto assets = assetBundle.getContents(); + const auto* metadata = assetBundle.getMetadata(); if (assets.empty()) { m_logger->log("Could not load shader %s", ILogger::ELL_ERROR, filepath); - return nullptr; + return {nullptr, hlsl::ShaderStage::ESS_UNKNOWN}; } assert(assets.size() == 1); @@ -330,18 +332,19 @@ class ShaderCompiler final : public system::IApplicationFramework auto buf = IAsset::castDown(assets[0]); std::string source; source.resize(buf->getSize()+1); memcpy(source.data(),buf->getPointer(),buf->getSize()); - return core::make_smart_refctd_ptr(source.data(), IShader::E_SHADER_STAGE::ESS_UNKNOWN, IShader::E_CONTENT_TYPE::ECT_HLSL, std::move(filepath)); + return { core::make_smart_refctd_ptr(source.data(), IShader::E_CONTENT_TYPE::ECT_HLSL, std::move(filepath)), hlsl::ShaderStage::ESS_UNKNOWN}; } else if (assetBundle.getAssetType() == IAsset::ET_SHADER) { - return smart_refctd_ptr_static_cast(assets[0]); + const auto hlslMetadata = static_cast(metadata); + return { smart_refctd_ptr_static_cast(assets[0]), hlslMetadata->shaderStages->front()}; } else { m_logger->log("file '%s' is an asset that is neither a buffer or a shader.", ILogger::ELL_ERROR, filepath); } - return nullptr; + return {nullptr, hlsl::ShaderStage::ESS_UNKNOWN}; }