diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index e8f7dbd33..7fa72235b 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -117,8 +117,8 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M // `CAsyncSingleBufferSubAllocator` just allows you suballocate subranges of any `IGPUBuffer` range with deferred/latched frees. constexpr uint32_t DownstreamBufferSize = sizeof(output_t)<<23; constexpr uint32_t UpstreamBufferSize = sizeof(input_t)<<23; - - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); + + m_utils = IUtilities::create(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 80f5f856c..b10efbf31 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -96,7 +96,7 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; constexpr uint32_t UpstreamBufferSize = sizeof(scalar_t) << 23; - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); diff --git a/21_LRUCacheUnitTest/main.cpp b/21_LRUCacheUnitTest/main.cpp index 1c63fc744..467c6d4e4 100644 --- a/21_LRUCacheUnitTest/main.cpp +++ b/21_LRUCacheUnitTest/main.cpp @@ -5,6 +5,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoSystemMonoLoggerApplication.hpp" +#include using namespace nbl; using namespace core; @@ -180,6 +181,38 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL cache3.insert(1, "bar"); cache3.clear(); + // Cache iterator test + constexpr uint32_t cache4Size = 10; + ResizableLRUCache cache4(cache4Size); + for (auto i = 0u; i < cache4Size; i++) + { + cache4.insert(i, i); + } + // Default iterator is MRU -> LRU + uint32_t counter = cache4Size - 1; + for (auto& pair : cache4) + { + assert(pair.first == counter && pair.second == counter); + counter--; + } + // Reverse LRU -> MRU traversal + counter = 0u; + for (auto it = cache4.crbegin(); it != cache4.crend(); it++) + { + assert(it->first == counter && it->second == counter); + counter++; + } + + // Cache copy test + ResizableLRUCache cache4Copy(cache4); + for (auto it = cache4.cbegin(), itCopy = cache4Copy.cbegin(); it != cache4.cend(); it++, itCopy++) + { + assert(*it == *itCopy); + // Assert deep copy + assert(it.operator->() != itCopy.operator->()); + + } + // Besides the disposal function that gets called when evicting, we need to check that the Cache properly destroys all resident `Key,Value` pairs when destroyed struct Foo { @@ -208,15 +241,13 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL int destroyCounter = 0; { - ResizableLRUCache cache4(10u); + ResizableLRUCache cache5(10u); for (int i = 0; i < 10; i++) - cache4.insert(i, Foo(&destroyCounter)); + cache5.insert(i, Foo(&destroyCounter)); int x = 0; } - assert(destroyCounter == 10); - m_logger->log("all good"); m_textureLRUCache = std::unique_ptr(new TextureLRUCache(1024u)); diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index cc312c3be..fddb45586 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -461,7 +461,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app assert(m_kerImageView); // Going to need an IUtils to perform uploads/downloads - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); // Now convert uploads // Get graphics queue for image transfer diff --git a/62_CAD/CTriangleMesh.cpp b/62_CAD/CTriangleMesh.cpp new file mode 100644 index 000000000..5564c0a51 --- /dev/null +++ b/62_CAD/CTriangleMesh.cpp @@ -0,0 +1 @@ +#include "CTriangleMesh.h" \ No newline at end of file diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h new file mode 100644 index 000000000..78f7dd99f --- /dev/null +++ b/62_CAD/CTriangleMesh.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include "shaders/globals.hlsl" + +using namespace nbl; + +struct DTMHeightShadingSettingsInfo +{ + // Height Shading Mode + E_HEIGHT_SHADING_MODE heightShadingMode; + + // Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + float intervalLength; + + // Converts an interval index to its corresponding height value + // For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0. + // This computed height is later used to determine the interpolated color for shading. + // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight + float intervalIndexToHeightMultiplier; + + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the + // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. + // Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans + // [minHeight, minHeight + intervalLength]. + bool isCenteredShading; + + void addHeightColorMapEntry(float height, float32_t4 color) + { + heightColorSet.emplace(height, color); + } + + bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const + { + const uint32_t mapSize = heightColorSet.size(); + if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries) + return false; + dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize; + + int index = 0; + for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) + { + dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color; + ++index; + } + + return true; + } + +private: + struct HeightColor + { + float height; + float32_t4 color; + + bool operator<(const HeightColor& other) const + { + return height < other.height; + } + }; + + std::set heightColorSet; +}; + +struct DTMContourSettingsInfo +{ + LineStyleInfo lineStyleInfo; + + float startHeight; + float endHeight; + float heightInterval; +}; + +struct DTMSettingsInfo +{ + static constexpr uint32_t MaxContourSettings = DTMSettings::MaxContourSettings; + + uint32_t mode = 0u; // related to E_DTM_MODE + + // outline + LineStyleInfo outlineStyleInfo; + // contours + uint32_t contourSettingsCount = 0u; + DTMContourSettingsInfo contourSettings[MaxContourSettings]; + // height shading + DTMHeightShadingSettingsInfo heightShadingInfo; +}; + +class CTriangleMesh final +{ +public: + using index_t = uint32_t; + using vertex_t = TriangleMeshVertex; + + inline void setVertices(core::vector&& vertices) + { + m_vertices = std::move(vertices); + } + inline void setIndices(core::vector&& indices) + { + m_indices = std::move(indices); + } + + inline const core::vector& getVertices() const + { + return m_vertices; + } + inline const core::vector& getIndices() const + { + return m_indices; + } + + inline size_t getVertexBuffByteSize() const + { + return sizeof(vertex_t) * m_vertices.size(); + } + inline size_t getIndexBuffByteSize() const + { + return sizeof(index_t) * m_indices.size(); + } + inline size_t getIndexCount() const + { + return m_indices.size(); + } + + inline void clear() + { + m_vertices.clear(); + m_indices.clear(); + } + + core::vector m_vertices; + core::vector m_indices; +}; \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7cf96d693..b40f6585c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -3,10 +3,13 @@ DrawResourcesFiller::DrawResourcesFiller() {} -DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue) : - m_utilities(utils), - m_copyQueue(copyQueue) -{} +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_utilities(std::move(utils)), + m_copyQueue(copyQueue), + m_logger(std::move(logger)) +{ + imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); +} // function is called when buffer is filled and we should submit draws and clear the buffers and continue filling @@ -15,116 +18,148 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) submitDraws = func; } -void DrawResourcesFiller::allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t maxIndices) +// DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding +void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding) +{ + imagesArrayBinding = binding; + suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); +} + +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) { - maxIndexCount = maxIndices; - const size_t indexBufferSize = maxIndices * sizeof(index_buffer_type); - auto indexBuffer = ICPUBuffer::create({ indexBufferSize }); + // single memory allocation sectioned into images+buffers (images start at offset=0) + const size_t adjustedImagesMemorySize = core::alignUp(requiredImageMemorySize, GPUStructsMaxNaturalAlignment); + const size_t adjustedBuffersMemorySize = core::max(requiredBufferMemorySize, getMinimumRequiredResourcesBufferSize()); + const size_t totalResourcesSize = adjustedImagesMemorySize + adjustedBuffersMemorySize; + + IGPUBuffer::SCreationParams resourcesBufferCreationParams = {}; + resourcesBufferCreationParams.size = adjustedBuffersMemorySize; + resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); + const bool memoryRequirementsMatch = + (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation - index_buffer_type* indices = reinterpret_cast(indexBuffer->getPointer()); - for (uint32_t i = 0u; i < maxIndices / 6u; ++i) + if (!memoryRequirementsMatch) { - index_buffer_type objIndex = i; - indices[i * 6] = objIndex * 4u + 1u; - indices[i * 6 + 1u] = objIndex * 4u + 0u; - indices[i * 6 + 2u] = objIndex * 4u + 2u; - - indices[i * 6 + 3u] = objIndex * 4u + 1u; - indices[i * 6 + 4u] = objIndex * 4u + 2u; - indices[i * 6 + 5u] = objIndex * 4u + 3u; + m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); + return false; } + + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); - IGPUBuffer::SCreationParams indexBufferCreationParams = {}; - indexBufferCreationParams.size = indexBufferSize; - indexBufferCreationParams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + uint32_t memoryTypeIdx = ~0u; - m_utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=m_copyQueue}, std::move(indexBufferCreationParams), indices).move_into(gpuDrawBuffers.indexBuffer); - gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); -} + video::IDeviceMemoryAllocator::SAllocation allocation = {}; + for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + { + if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + { + memoryTypeIdx = i; -void DrawResourcesFiller::allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects) -{ - maxMainObjects = mainObjects; - size_t mainObjectsBufferSize = maxMainObjects * sizeof(MainObject); + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = + { + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) + break; + } + } - IGPUBuffer::SCreationParams mainObjectsCreationParams = {}; - mainObjectsCreationParams.size = mainObjectsBufferSize; - mainObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.mainObjectsBuffer = logicalDevice->createBuffer(std::move(mainObjectsCreationParams)); - gpuDrawBuffers.mainObjectsBuffer->setObjectDebugName("mainObjectsBuffer"); + if (memoryTypeIdx == ~0u) + { + m_logger.log("allocateResourcesBuffer: no device local memory type found!", nbl::system::ILogger::ELL_ERROR); + return false; + } - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.mainObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto mainObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.mainObjectsBuffer.get()); + if (!allocation.isValid()) + return false; - cpuDrawBuffers.mainObjectsBuffer = ICPUBuffer::create({ mainObjectsBufferSize }); -} + imagesMemoryArena = { + .memory = allocation.memory, + .offset = allocation.offset, + }; -void DrawResourcesFiller::allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects) -{ - maxDrawObjects = drawObjects; - size_t drawObjectsBufferSize = maxDrawObjects * sizeof(DrawObject); + buffersMemoryArena = { + .memory = allocation.memory, + .offset = core::alignUp(allocation.offset + adjustedImagesMemorySize, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation + }; + + imagesMemorySubAllocator = core::make_smart_refctd_ptr(adjustedImagesMemorySize); - IGPUBuffer::SCreationParams drawObjectsCreationParams = {}; - drawObjectsCreationParams.size = drawObjectsBufferSize; - drawObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.drawObjectsBuffer = logicalDevice->createBuffer(std::move(drawObjectsCreationParams)); - gpuDrawBuffers.drawObjectsBuffer->setObjectDebugName("drawObjectsBuffer"); + video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = { + .buffer = resourcesGPUBuffer.get(), + .binding = { + .memory = buffersMemoryArena.memory.get(), + .offset = buffersMemoryArena.offset, + } + }; - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.drawObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto drawObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.drawObjectsBuffer.get()); + if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory)) + { + m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR); + return false; + } - cpuDrawBuffers.drawObjectsBuffer = ICPUBuffer::create({ drawObjectsBufferSize }); + return true; } -void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size) +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent, uint32_t maxTries) { - maxGeometryBufferSize = size; - - IGPUBuffer::SCreationParams geometryCreationParams = {}; - geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.geometryBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); - gpuDrawBuffers.geometryBuffer->setObjectDebugName("geometryBuffer"); + const size_t minimumAcceptableSize = core::max(MinimumDrawResourcesMemorySize, getMinimumRequiredResourcesBufferSize()); - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.geometryBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto geometryBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.geometryBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - geometryBufferAddress = gpuDrawBuffers.geometryBuffer->getDeviceAddress(); + size_t currentBufferSize = maxBufferMemorySize; + size_t currentImageSize = maxImageMemorySize; + const size_t totalInitialSize = currentBufferSize + currentImageSize; - cpuDrawBuffers.geometryBuffer = ICPUBuffer::create({ size }); -} - -void DrawResourcesFiller::allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount) -{ + // If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved + if (totalInitialSize < minimumAcceptableSize) { - maxLineStyles = lineStylesCount; - size_t lineStylesBufferSize = lineStylesCount * sizeof(LineStyle); - - IGPUBuffer::SCreationParams lineStylesCreationParams = {}; - lineStylesCreationParams.size = lineStylesBufferSize; - lineStylesCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.lineStylesBuffer = logicalDevice->createBuffer(std::move(lineStylesCreationParams)); - gpuDrawBuffers.lineStylesBuffer->setObjectDebugName("lineStylesBuffer"); + // Preserve ratio: R = buffer / (buffer + image) + // scaleFactor = minimumAcceptableSize / totalInitialSize; + const double scaleFactor = static_cast(minimumAcceptableSize) / totalInitialSize; + currentBufferSize = static_cast(currentBufferSize * scaleFactor); + currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum + } - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.lineStylesBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto stylesBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.lineStylesBuffer.get()); + uint32_t numTries = 0u; + while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) + { + if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + return true; - cpuDrawBuffers.lineStylesBuffer = ICPUBuffer::create({ lineStylesBufferSize }); + currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; + currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; + numTries++; + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); } + + m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); + return false; } -void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) +bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { - msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); - msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); - + // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; - assert(maxMSDFs <= logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers); - + if (maxMSDFs > logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers) + { + m_logger.log("requested maxMSDFs is greater than maxImageArrayLayers. lowering the limit...", nbl::system::ILogger::ELL_WARNING); + maxMSDFs = logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers; + } + IPhysicalDevice::SImageFormatPromotionRequest promotionRequest = {}; promotionRequest.originalFormat = msdfFormat; promotionRequest.usages = {}; @@ -146,7 +181,10 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui auto image = logicalDevice->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); imageMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(imageMemReqs, image.get()); + const auto allocation = logicalDevice->allocate(imageMemReqs, image.get()); + + if (!allocation.isValid()) + return false; image->setObjectDebugName("MSDFs Texture Array"); @@ -163,6 +201,14 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui msdfTextureArray = logicalDevice->createImageView(std::move(imgViewInfo)); } + + if (!msdfTextureArray) + return false; + + msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); + msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); + msdfImagesState.resize(maxMSDFs); + return true; } void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit) @@ -170,18 +216,33 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line if (!lineStyleInfo.isVisible()) return; - uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyleInfo, intendedNextSubmit); + setActiveLineStyle(lineStyleInfo); + + beginMainObject(MainObjectType::POLYLINE, TransformationType::TT_NORMAL); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); +} - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); +void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!lineStyleInfo.isVisible()) + return; - drawPolyline(polyline, mainObjIdx, intendedNextSubmit); + setActiveLineStyle(lineStyleInfo); + + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); + beginMainObject(MainObjectType::POLYLINE, transformationType); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); + popCustomProjection(); } -void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit) { - if (polylineMainObjIdx == InvalidMainObjectIdx) + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) { - // TODO: assert or log error here + m_logger.log("drawPolyline: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); assert(false); return; } @@ -194,7 +255,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p while (currentSectionIdx < sectionsCount) { const auto& currentSection = polyline.getSectionInfoAt(currentSectionIdx); - addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, polylineMainObjIdx); + addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, mainObjectIdx); if (currentObjectInSection >= currentSection.count) { @@ -202,7 +263,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p currentObjectInSection = 0u; } else - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } if (!polyline.getConnectors().empty()) @@ -210,14 +271,72 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p uint32_t currentConnectorPolylineObject = 0u; while (currentConnectorPolylineObject < polyline.getConnectors().size()) { - addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, polylineMainObjIdx); + addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, mainObjectIdx); if (currentConnectorPolylineObject < polyline.getConnectors().size()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } } } +void DrawResourcesFiller::drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit) +{ + flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls + + setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::DTM); + + DrawCallData drawCallData = {}; + drawCallData.isDTMRendering = true; + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; + + ICPUBuffer::SCreationParams geometryBuffParams; + + // concatenate the index and vertex buffer into the geometry buffer + const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); + const size_t vtxBuffByteSize = mesh.getVertexBuffByteSize(); + const size_t dataToAddByteSize = vtxBuffByteSize + indexBuffByteSize; + + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= remainingResourcesSize? + // TODO: auto submit instead of assert + assert(dataToAddByteSize <= remainingResourcesSize); + + { + // NOTE[ERFAN]: these push contants will be removed, everything will be accessed by dtmSettings, including where the vertex buffer data resides + + // Copy VertexBuffer + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(dataToAddByteSize, alignof(CTriangleMesh::vertex_t)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` + drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset; + memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); + geometryBufferOffset += vtxBuffByteSize; + + // Copy IndexBuffer + dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + drawCallData.dtm.indexBufferOffset = geometryBufferOffset; + memcpy(dst, mesh.getIndices().data(), indexBuffByteSize); + geometryBufferOffset += indexBuffByteSize; + } + + drawCallData.dtm.indexCount = mesh.getIndexCount(); + drawCalls.push_back(drawCallData); + endMainObject(); +} + // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor void DrawResourcesFiller::drawHatch( const Hatch& hatch, @@ -226,10 +345,8 @@ void DrawResourcesFiller::drawHatch( const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) { - // TODO[Optimization Idea]: don't draw hatch twice if both colors are visible: instead do the msdf inside the alpha resolve by detecting mainObj being a hatch - // https://discord.com/channels/593902898015109131/856835291712716820/1228337893366300743 - // TODO: Come back to this idea when doing color resolve for ecws (they don't have mainObj/style Index, instead they have uv into a texture - + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) + // if backgroundColor is visible drawHatch(hatch, backgroundColor, intendedNextSubmit); // if foregroundColor is visible @@ -241,38 +358,92 @@ void DrawResourcesFiller::drawHatch( const float32_t4& color, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) + + // if backgroundColor is visible + drawFixedGeometryHatch(hatch, backgroundColor, transformation, transformationType, intendedNextSubmit); + // if foregroundColor is visible + drawFixedGeometryHatch(hatch, foregroundColor, fillPattern, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit, transformationType); + popCustomProjection(); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + drawFixedGeometryHatch(hatch, color, HatchFillPattern::SOLID_FILL, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + TransformationType transformationType) { if (color.a == 0.0f) // not visible return; - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; if (fillPattern != HatchFillPattern::SOLID_FILL) { MSDFInputInfo msdfInfo = MSDFInputInfo(fillPattern); textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), InvalidMainObjectIdx, intendedNextSubmit); - _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIdx); // probably getHatchFillPatternMSDF returned nullptr + if (textureIdx == InvalidTextureIndex) + textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); + _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIndex); // probably getHatchFillPatternMSDF returned nullptr } LineStyleInfo lineStyle = {}; lineStyle.color = color; lineStyle.screenSpaceLineWidth = nbl::hlsl::bit_cast(textureIdx); - const uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); - uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject used in vertex shader. You can think of it as a Cage. + setActiveLineStyle(lineStyle); + beginMainObject(MainObjectType::HATCH, transformationType); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { - addHatch_Internal(hatch, currentObjectInSection, mainObjIdx); + addHatch_Internal(hatch, currentObjectInSection, mainObjectIdx); if (currentObjectInSection < hatch.getHatchBoxCount()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } -} -void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) -{ - drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); + endMainObject(); } void DrawResourcesFiller::drawFontGlyph( @@ -282,194 +453,758 @@ void DrawResourcesFiller::drawFontGlyph( float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) { - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), mainObjIdx, intendedNextSubmit); + if (textureIdx == InvalidTextureIndex) + textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), intendedNextSubmit); - if (textureIdx != InvalidTextureIdx) + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("drawFontGlyph: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); if (!addFontGlyph_Internal(glyphInfo, mainObjIdx)) { // single font glyph couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GlyphInfo + const bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); + if (!success) + { + m_logger.log("addFontGlyph_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } } else { - // TODO: Log, probably getGlyphMSDF(face,glyphIdx) returned nullptr ICPUImage ptr + m_logger.log("drawFontGlyph: textureIdx is invalid.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } } -bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - success &= finalizeMainObjectCopiesToGPU(intendedNextSubmit); - success &= finalizeGeometryCopiesToGPU(intendedNextSubmit); - success &= finalizeLineStyleCopiesToGPU(intendedNextSubmit); - success &= finalizeTextureCopies(intendedNextSubmit); - return success; -} + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(staticImage.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN -uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) -{ - uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); - if (outLineStyleIdx == InvalidStyleIdx) + if (cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - outLineStyleIdx = addLineStyle_Internal(lineStyle); - assert(outLineStyleIdx != InvalidStyleIdx); + // found in cache, and we want to force new data into the image + if (cachedImageRecord->staticCPUImage) + { + const auto cachedImageParams = cachedImageRecord->staticCPUImage->getCreationParameters(); + const auto newImageParams = staticImage.cpuImage->getCreationParameters(); + const bool needsRecreation = newImageParams != cachedImageParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set + evictCallback(staticImage.imageID, *cachedImageRecord); + + // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + *cachedImageRecord = CachedImageRecord(currentFrameIndex); + } + else + { + // Doesn't need image recreation, we'll use the same array index in descriptor set + the same bound memory. + // reset it's state + update the cpu image used for copying. + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; + } + } + else + { + m_logger.log("found static image has empty cpu image, shouldn't happen", nbl::system::ILogger::ELL_ERROR); + } } - return outLineStyleIdx; -} -uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit) -{ - MainObject mainObject = {}; - mainObject.styleIdx = styleIdx; - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); - if (outMainObjectIdx == InvalidMainObjectIdx) + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - // geometries needs to be reset because they reference draw objects and draw objects reference main objects that are now unavailable and reset - resetGeometryCounters(); - // mainObjects needs to be reset because we submitted every previous main object - resetMainObjectCounters(); - // we shouldn't reset linestyles and clip projections here because it was possibly requested to push to mem before addMainObjects - // but clip projections are reset due to geometry/bda buffer being reset so we need to push again - - // acquireCurrentClipProjectionAddress again here because clip projection should exist in the geometry buffer, and reseting geometry counters will invalidate the current clip proj and requires repush - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - outMainObjectIdx = addMainObject_Internal(mainObject); - assert(outMainObjectIdx != InvalidMainObjectIdx); + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + IGPUImage::SCreationParams imageParams = {}; + imageParams = staticImage.cpuImage->getCreationParameters(); + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + }; + imageParams.format = physDev->promoteImageFormat(request,imageParams.tiling); + } + + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, staticImage.imageViewFormatOverride, intendedNextSubmit, std::to_string(staticImage.imageID)); + + if (allocResults.isValid()) + { + cachedImageRecord->type = ImageType::STATIC; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; + } + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + m_logger.log("ensureStaticImageAvailability failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); + } + + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } + + // erase the entry we failed to allocate an image for, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(staticImage.imageID); + } + } + else + { + m_logger.log("ensureStaticImageAvailability failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } } - return outMainObjectIdx; -} + + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; -void DrawResourcesFiller::pushClipProjectionData(const ClipProjectionData& clipProjectionData) -{ - clipProjections.push_back(clipProjectionData); - clipProjectionAddresses.push_back(InvalidClipProjectionAddress); + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return cachedImageRecord->arrayIndex != InvalidTextureIndex; } -void DrawResourcesFiller::popClipProjectionData() +bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjections.empty()) - return; + if (staticImages.size() > ImagesBindingArraySize) + return false; - clipProjections.pop_back(); - clipProjectionAddresses.pop_back(); + for (auto& staticImage : staticImages) + { + if (!ensureStaticImageAvailability(staticImage, intendedNextSubmit)) + return false; // failed ensuring a single staticImage is available, shouldn't happen unless the image is larger than the memory arena allocated for images. + } + for (auto& staticImage : staticImages) + { + if (imagesCache->peek(staticImage.imageID) == nullptr) + return false; // this means one of the images evicted another, most likely due to VRAM limitations not all images can be resident all at once. + } + return true; } -bool DrawResourcesFiller::finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - // Copy MainObjects - uint32_t remainingMainObjects = currentMainObjectCount - inMemMainObjectCount; - SBufferRange mainObjectsRange = { sizeof(MainObject) * inMemMainObjectCount, sizeof(MainObject) * remainingMainObjects, gpuDrawBuffers.mainObjectsBuffer }; - if (mainObjectsRange.size > 0u) - { - const MainObject* srcMainObjData = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()) + inMemMainObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, mainObjectsRange, srcMainObjData)) - inMemMainObjectCount = currentMainObjectCount; - else + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + + // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE + IGPUImage::SCreationParams imageCreationParams = {}; + ImageType georeferenceImageType; + determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); + + // imageParams = cpuImage->getCreationParameters(); + imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageCreationParams.usage) + }; + imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); + } + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // But we need to check if the cached image needs resizing/recreation. + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // found in cache, but does it require resize? recreation? + if (cachedImageRecord->gpuImageView) { - // TODO: Log - success = false; + auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); + if (imgViewParams.image) + { + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + const auto cachedImageType = cachedImageRecord->type; + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto currentParams = static_cast(imageCreationParams); + const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. + evictCallback(imageID, *cachedImageRecord); + + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + *cachedImageRecord = CachedImageRecord(currentFrameIndex); + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + } + } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); + } } - } - return success; -} - -bool DrawResourcesFiller::finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) -{ - bool success = true; - // Copy DrawObjects - uint32_t remainingDrawObjects = currentDrawObjectCount - inMemDrawObjectCount; - SBufferRange drawObjectsRange = { sizeof(DrawObject) * inMemDrawObjectCount, sizeof(DrawObject) * remainingDrawObjects, gpuDrawBuffers.drawObjectsBuffer }; - if (drawObjectsRange.size > 0u) - { - const DrawObject* srcDrawObjData = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + inMemDrawObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, drawObjectsRange, srcDrawObjData)) - inMemDrawObjectCount = currentDrawObjectCount; else { - // TODO: Log - success = false; + m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); } } - // Copy GeometryBuffer - uint64_t remainingGeometrySize = currentGeometryBufferSize - inMemGeometryBufferSize; - SBufferRange geomRange = { inMemGeometryBufferSize, remainingGeometrySize, gpuDrawBuffers.geometryBuffer }; - if (geomRange.size > 0u) + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { - const uint8_t* srcGeomData = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + inMemGeometryBufferSize; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, geomRange, srcGeomData)) - inMemGeometryBufferSize = currentGeometryBufferSize; + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); + + if (allocResults.isValid()) + { + cachedImageRecord->type = georeferenceImageType; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = nullptr; + } + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); + } + + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } + + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); + } + } else { - // TODO: Log - success = false; + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - return success; + + + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; + + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } -bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy) { - bool success = true; - // Copy LineStyles - uint32_t remainingLineStyles = currentLineStylesCount - inMemLineStylesCount; - SBufferRange stylesRange = { sizeof(LineStyle) * inMemLineStylesCount, sizeof(LineStyle) * remainingLineStyles, gpuDrawBuffers.lineStylesBuffer }; - if (stylesRange.size > 0u) - { - const LineStyle* srcLineStylesData = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()) + inMemLineStylesCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, stylesRange, srcLineStylesData)) - inMemLineStylesCount = currentLineStylesCount; - else + auto& vec = streamedImageCopies[imageID]; + vec.emplace_back(imageCopy); + return true; +} + +// TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo +// We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) +void DrawResourcesFiller::drawGridDTM( + const float64_t2& topLeft, + float64_t2 worldSpaceExtents, + float gridCellWidth, + uint64_t textureID, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit, + bool drawGridOnly/* = false*/) +{ + if (dtmSettingsInfo.mode == 0u) + return; + + if (dtmSettingsInfo.mode == E_DTM_MODE::OUTLINE) + drawGridOnly = true; + + GridDTMInfo gridDTMInfo; + gridDTMInfo.topLeft = topLeft; + gridDTMInfo.worldSpaceExtents = worldSpaceExtents; + gridDTMInfo.gridCellWidth = gridCellWidth; + if(!drawGridOnly) + gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + + // determine the thickes line + float thickestLineThickness = 0.0f; + if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) + { + thickestLineThickness = dtmSettingsInfo.outlineStyleInfo.worldSpaceLineWidth + dtmSettingsInfo.outlineStyleInfo.screenSpaceLineWidth; + } + else if (dtmSettingsInfo.mode & E_DTM_MODE::CONTOUR && !drawGridOnly) + { + for (int i = 0; i < dtmSettingsInfo.contourSettingsCount; ++i) { - // TODO: Log - success = false; + const auto& contourLineStyle = dtmSettingsInfo.contourSettings[i].lineStyleInfo; + const float contourLineThickness = contourLineStyle.worldSpaceLineWidth + contourLineStyle.screenSpaceLineWidth; + thickestLineThickness = std::max(thickestLineThickness, contourLineThickness); } } - return success; + gridDTMInfo.thicknessOfTheThickestLine = thickestLineThickness; + + setActiveDTMSettings(dtmSettingsInfo, drawGridOnly); + beginMainObject(MainObjectType::GRID_DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawGridDTM: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + if (!addGridDTM_Internal(gridDTMInfo, mainObjectIdx)) + { + // single grid DTM couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); + const bool success = addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + if (!success) + { + m_logger.log("addGridDTM_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + + endMainObject(); } -bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit) { - msdfTextureArrayIndicesUsed.clear(); // clear msdf textures used in the frame, because the frame finished and called this function. + beginMainObject(MainObjectType::STATIC_IMAGE); - if (!msdfTextureCopies.size() && m_hasInitializedMSDFTextureArrays) // even if the textureCopies are empty, we want to continue if not initialized yet so that the layout of all layers become READ_ONLY_OPTIMAL - return true; // yay successfully copied nothing + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addImageObject: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - - if (cmdBuffInfo) + ImageObjectInfo info = {}; + info.topLeft = obb.topLeft; + info.dirU = obb.dirU; + info.aspectRatio = obb.aspectRatio; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addImageObject_Internal(info, mainObjIdx)) { - IGPUCommandBuffer* cmdBuff = cmdBuffInfo->cmdbuf; + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addImageObject_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addImageObject_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } - auto msdfImage = msdfTextureArray->getCreationParameters().image; + endMainObject(); +} - // preparing msdfs for copy - using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; - image_barrier_t beforeTransferImageBarrier[] = +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +{ + beginMainObject(MainObjectType::STREAMED_IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + GeoreferencedImageInfo info = {}; + info.topLeft = params.worldspaceOBB.topLeft; + info.dirU = params.worldspaceOBB.dirU; + info.aspectRatio = params.worldspaceOBB.aspectRatio; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + + endMainObject(); +} + +bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!intendedNextSubmit.valid()) + { + // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources + // In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer) + return false; + } + + bool success = true; + if (currentReplayCache) + { + // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation + success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); + + // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index + auto* device = m_utilities->getLogicalDevice(); + bool replayCacheFullyCovered = true; + for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) + { + if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this + continue; + + auto* cachedRecord = imagesCache->peek(imageID); + bool alreadyResident = false; + + // compare with existing state, and check whether image id is already resident. + if (cachedRecord != nullptr) + { + const bool allocationMatches = + cachedRecord->allocationOffset == toReplayRecord.allocationOffset && + cachedRecord->allocationSize == toReplayRecord.allocationSize; + + const bool arrayIndexMatches = cachedRecord->arrayIndex == toReplayRecord.arrayIndex; + + alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + } + + // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. + if (alreadyResident) + { + toReplayRecord.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state + continue; + } + + replayCacheFullyCovered = false; + + bool successCreateNewImage = false; + + // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); + IGPUImage::SCreationParams imageParams = {}; + imageParams = existingGPUImageViewParams.image->getCreationParameters(); + + auto newGPUImage = device->createImage(std::move(imageParams)); + if (newGPUImage) + { + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = newGPUImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } + }; + + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + newGPUImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); + IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; + viewParams.image = newGPUImage; + + auto newGPUImageView = device->createImageView(std::move(viewParams)); + if (newGPUImageView) + { + successCreateNewImage = true; + toReplayRecord.gpuImageView = newGPUImageView; + toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; + newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); + } + + } + } + + if (!successCreateNewImage) + { + m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. + // imagesCache = std::make_unique(*currentReplayCache->imagesCache); + imagesCache->clear(); + for (auto it = currentReplayCache->imagesCache->rbegin(); it != currentReplayCache->imagesCache->rend(); it++) + imagesCache->base_t::insert(it->first, it->second); + + if (!replayCacheFullyCovered) + { + // We need to block for previous submit in order to safely update the descriptor set array index next. + // + // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, + // we could recreate the image and image view, binding them to entirely new memory locations. + // This would require an indirection mechanism in the shader to remap references from cached geometry or objects to the new image array indices. + // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + device->blockForSemaphores({ &waitInfo, 1u }); + } + + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + // Streamed uploads in cache&replay?! + } + else + { + flushDrawObjects(); + success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + success &= pushStreamedImagesUploads(intendedNextSubmit); + } + return success; +} + +const DrawResourcesFiller::ResourcesCollection& DrawResourcesFiller::getResourcesCollection() const +{ + if (currentReplayCache) + return currentReplayCache->resourcesCollection; + else + return resourcesCollection; +} + +void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) +{ + activeLineStyle = lineStyle; + activeLineStyleIndex = InvalidStyleIdx; +} + +void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo, const bool disableHeightRelatedDTMModes/* = false*/) +{ + activeDTMSettings = dtmSettingsInfo; + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + + if (disableHeightRelatedDTMModes) + activeDTMSettings.mode &= E_DTM_MODE::OUTLINE; +} + +void DrawResourcesFiller::beginMainObject(MainObjectType type, TransformationType transformationType) +{ + activeMainObjectType = type; + activeMainObjectTransformationType = transformationType; + activeMainObjectIndex = InvalidMainObjectIdx; +} + +void DrawResourcesFiller::endMainObject() +{ + activeMainObjectType = MainObjectType::NONE; + activeMainObjectTransformationType = TransformationType::TT_NORMAL; + activeMainObjectIndex = InvalidMainObjectIdx; +} + +void DrawResourcesFiller::pushCustomProjection(const float64_t3x3& projection) +{ + activeProjections.push_back(projection); + activeProjectionIndices.push_back(InvalidCustomProjectionIndex); +} + +void DrawResourcesFiller::popCustomProjection() +{ + if (activeProjections.empty()) + return; + + activeProjections.pop_back(); + activeProjectionIndices.pop_back(); +} + +void DrawResourcesFiller::pushCustomClipRect(const WorldClipRect& clipRect) +{ + activeClipRects.push_back(clipRect); + activeClipRectIndices.push_back(InvalidCustomClipRectIndex); +} + +void DrawResourcesFiller::popCustomClipRect() +{ if (activeClipRects.empty()) + return; + + activeClipRects.pop_back(); + activeClipRectIndices.pop_back(); +} + +/// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj +uint32_t DrawResourcesFiller::getActiveMainObjectIndex() const +{ + if (currentReplayCache) + return currentReplayCache->activeMainObjectIndex; + else + return activeMainObjectIndex; +} + +const std::vector& DrawResourcesFiller::getDrawCalls() const +{ + if (currentReplayCache) + return currentReplayCache->drawCallsData; + else + return drawCalls; +} + +std::unique_ptr DrawResourcesFiller::createReplayCache() +{ + flushDrawObjects(); + std::unique_ptr ret = std::unique_ptr(new ReplayCache); + ret->resourcesCollection = resourcesCollection; + ret->msdfImagesState = msdfImagesState; + for (auto& stagedMSDF : ret->msdfImagesState) + stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. + ret->drawCallsData = drawCalls; + ret->activeMainObjectIndex = activeMainObjectIndex; + ret->imagesCache = std::unique_ptr(new ImagesCache(*imagesCache)); + return ret; +} + +void DrawResourcesFiller::setReplayCache(ReplayCache* cache) +{ + currentReplayCache = cache; +} + +void DrawResourcesFiller::unsetReplayCache() +{ + currentReplayCache = nullptr; +} + +bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) +{ + copiedResourcesSize = 0ull; + + if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize()) + { + m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type CPUGeneratedResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + if (copyRange.size > 0ull) + { + if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + return false; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + } + return true; + }; + + auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type ReservedComputeResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + }; + + copyCPUFilledDrawBuffer(resources.lineStyles); + copyCPUFilledDrawBuffer(resources.dtmSettings); + copyCPUFilledDrawBuffer(resources.customProjections); + copyCPUFilledDrawBuffer(resources.customClipRects); + copyCPUFilledDrawBuffer(resources.mainObjects); + copyCPUFilledDrawBuffer(resources.drawObjects); + copyCPUFilledDrawBuffer(resources.indexBuffer); + copyCPUFilledDrawBuffer(resources.geometryInfo); + + return true; +} + +bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) +{ + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + auto msdfImage = msdfTextureArray->getCreationParameters().image; + + // preparing msdfs for imageRecord + using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; + image_barrier_t beforeTransferImageBarrier[] = { { .barrier = { @@ -493,25 +1228,24 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); // Do the copies and advance the iterator. // this is the pattern we use for iterating when entries will get erased if processed successfully, but may get skipped for later. - auto oit = msdfTextureCopies.begin(); - for (auto iit = msdfTextureCopies.begin(); iit != msdfTextureCopies.end(); iit++) + for (uint32_t i = 0u; i < stagedMSDFCPUImages.size(); ++i) { - bool copySuccess = true; - if (iit->image && iit->index < msdfImage->getCreationParameters().arrayLayers) + auto& stagedMSDF = stagedMSDFCPUImages[i]; + if (stagedMSDF.image && i < msdfImage->getCreationParameters().arrayLayers) { - for (uint32_t mip = 0; mip < iit->image->getCreationParameters().mipLevels; mip++) + for (uint32_t mip = 0; mip < stagedMSDF.image->getCreationParameters().mipLevels; mip++) { - auto mipImageRegion = iit->image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); + auto mipImageRegion = stagedMSDF.image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); if (mipImageRegion) { asset::IImage::SBufferCopy region = {}; region.imageSubresource.aspectMask = asset::IImage::EAF_COLOR_BIT; region.imageSubresource.mipLevel = mipImageRegion->imageSubresource.mipLevel; - region.imageSubresource.baseArrayLayer = iit->index; + region.imageSubresource.baseArrayLayer = i; region.imageSubresource.layerCount = 1u; region.bufferOffset = 0u; region.bufferRowLength = mipImageRegion->getExtent().width; @@ -519,46 +1253,31 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex region.imageExtent = mipImageRegion->imageExtent; region.imageOffset = { 0u, 0u, 0u }; - auto buffer = reinterpret_cast(iit->image->getBuffer()->getPointer()); + auto buffer = reinterpret_cast(stagedMSDF.image->getBuffer()->getPointer()); auto bufferOffset = mipImageRegion->bufferOffset; - if (!m_utilities->updateImageViaStagingBuffer( + stagedMSDF.uploadedToGPU = m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, buffer + bufferOffset, nbl::ext::TextRendering::TextRenderer::MSDFTextureFormat, msdfImage.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { ®ion, ®ion + 1 })) - { - // TODO: Log which mip failed - copySuccess = false; - } + { ®ion, ®ion + 1 }); } else { - // TODO: Log - copySuccess = false; + assert(false); + stagedMSDF.uploadedToGPU = false; } } } else { - assert(false); - copySuccess = false; - } - - if (!copySuccess) - { - // we move the failed copy to the oit and advance it - if (oit != iit) - *oit = *iit; - oit++; + stagedMSDF.uploadedToGPU = false; } } - // trim - const auto newSize = std::distance(msdfTextureCopies.begin(), oit); - _NBL_DEBUG_BREAK_IF(newSize != 0u); // we had failed copies - msdfTextureCopies.resize(newSize); + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change // preparing msdfs for use image_barrier_t afterTransferImageBarrier[] = @@ -585,8 +1304,8 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); - + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); + if (!m_hasInitializedMSDFTextureArrays) m_hasInitializedMSDFTextureArrays = true; @@ -594,136 +1313,588 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex } else { - // TODO: Log no valid command buffer to record into + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); return false; } } -void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex) +bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); + bool success = true; + + auto* device = m_utilities->getLogicalDevice(); + auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + + // DescriptorSet Updates + std::vector descriptorInfos; + std::vector descriptorWrites; + descriptorInfos.resize(imagesCache.size()); + descriptorWrites.resize(imagesCache.size()); + + uint32_t descriptorWriteCount = 0u; + for (auto& [id, record] : imagesCache) + { + if (record.state >= ImageState::BOUND_TO_DESCRIPTOR_SET || !record.gpuImageView) + continue; + + // Bind gpu image view to descriptor set + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; + descriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo.desc = record.gpuImageView; + descriptorInfos[descriptorWriteCount] = descriptorInfo; + + // consider batching contiguous writes, if descriptor set updating was a hotspot + IGPUDescriptorSet::SWriteDescriptorSet descriptorWrite = {}; + descriptorWrite.dstSet = descriptorSet; + descriptorWrite.binding = imagesArrayBinding; + descriptorWrite.arrayElement = record.arrayIndex; + descriptorWrite.count = 1u; + descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; + descriptorWrites[descriptorWriteCount] = descriptorWrite; + + record.state = ImageState::BOUND_TO_DESCRIPTOR_SET; + descriptorWriteCount++; + } - // We reset Geometry Counters (drawObj+geometryInfos) because we're done rendering previous geometry - // We don't reset counters for styles because we will be reusing them - resetGeometryCounters(); + if (descriptorWriteCount > 0u) + success &= device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + return success; +} + +bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache) +{ + bool success = true; + + // Push Static Images Uploads, only those who are not gpu resident + // TODO: remove this vector and check state in each for loop below? + std::vector nonResidentImageRecords; + for (auto& [id, record] : imagesCache) + { + if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) + nonResidentImageRecords.push_back(&record); + } + + if (nonResidentImageRecords.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); -#if 1 - if (mainObjectIndex < maxMainObjects) + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.resize(nonResidentImageRecords.size()); + + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + beforeCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageRecord.staticCPUImage->getBuffer()->getPointer(), imageRecord.staticCPUImage->getCreationParameters().format, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + imageRecord.staticCPUImage->getRegions()); + + if (success) + imageRecord.state = ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + else + { + m_logger.log("Failed `updateImageViaStagingBuffer` in pushStaticImagesUploads.", nbl::system::ILogger::ELL_ERROR); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.resize(nonResidentImageRecords.size()); + + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + afterCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + if (!success) { - // Check if user is following proper usage, mainObjectIndex should be the last mainObj added before an autosubmit, because this is the only mainObj we want to maintain. - // See comments on`addMainObject_SubmitIfNeeded` function - // TODO: consider forcing this by not expose mainObjectIndex to user and keep track of a "currentMainObj" (?) - _NBL_DEBUG_BREAK_IF(mainObjectIndex != (currentMainObjectCount - 1u)); + m_logger.log("Failure in `pushStaticImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; +} - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it existed in geometry data and it was erased) - uint64_t newClipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - // only re-upload mainObjData if it's clipProjectionAddress was changed - if (newClipProjectionAddress != getMainObject(mainObjectIndex)->clipProjectionAddress) +bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + + if (streamedImageCopies.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) { - // then modify the mainObject data - getMainObject(mainObjectIndex)->clipProjectionAddress = newClipProjectionAddress; - // we need to rewind back inMemMainObjectCount to this mainObjIndex so it re-uploads the current mainObject (because we modified it) - inMemMainObjectCount = core::min(inMemMainObjectCount, mainObjectIndex); + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + for (auto& imageCopy : imageCopies) + { + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageCopy.srcBuffer->getPointer(), imageCopy.srcFormat, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + { &imageCopy.region, 1u }); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + afterCopyImageBarriers.push_back ( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + + streamedImageCopies.clear(); } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + if (!success) + { + m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; +} + +const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const +{ + assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); + return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); +} + +void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex) +{ + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); // it will be 0 because it's first mainObjectIndex after reset and invalidation +} + +uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? + if (!enoughMem) + return InvalidStyleIdx; + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed + + LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); + _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( + for (uint32_t i = 0u; i < resourcesCollection.lineStyles.vector.size(); ++i) + { + const LineStyle& itr = resourcesCollection.lineStyles.vector[i]; + if (itr == gpuLineStyle) + return i; + } + + return resourcesCollection.lineStyles.addAndGetOffset(gpuLineStyle); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} + +uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t noOfLineStylesRequired = ((dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) ? 1u : 0u) + dtmSettingsInfo.contourSettingsCount; + const size_t maxMemRequired = sizeof(DTMSettings) + noOfLineStylesRequired * sizeof(LineStyle); + const bool enoughMem = remainingResourcesSize >= maxMemRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) + return InvalidDTMSettingsIdx; + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed + + DTMSettings dtmSettings; + + ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + + dtmSettings.mode = dtmSettingsInfo.mode; + if (dtmSettings.mode & E_DTM_MODE::HEIGHT_SHADING) + { + switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits::infinity(); + break; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + break; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = 0.0f; + break; + } + dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.heightShadingSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); } + if (dtmSettings.mode & E_DTM_MODE::CONTOUR) + { + dtmSettings.contourSettingsCount = dtmSettingsInfo.contourSettingsCount; + for (uint32_t i = 0u; i < dtmSettings.contourSettingsCount; ++i) + { + dtmSettings.contourSettings[i].contourLinesStartHeight = dtmSettingsInfo.contourSettings[i].startHeight; + dtmSettings.contourSettings[i].contourLinesEndHeight = dtmSettingsInfo.contourSettings[i].endHeight; + dtmSettings.contourSettings[i].contourLinesHeightInterval = dtmSettingsInfo.contourSettings[i].heightInterval; + dtmSettings.contourSettings[i].contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourSettings[i].lineStyleInfo); + } + } + if (dtmSettings.mode & E_DTM_MODE::OUTLINE) + { + dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineStyleInfo); + } + + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) + { + const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; + if (itr == dtmSettings) + return i; + } + + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} + +float64_t3x3 DrawResourcesFiller::getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const +{ + if (!activeProjections.empty()) + { + float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); + + if (transformationType == TransformationType::TT_NORMAL) + { + return newTransformation; + } + else if (transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) + { + // Extract normalized rotation columns + float64_t2 column0 = nbl::hlsl::normalize(float64_t2(newTransformation[0][0], newTransformation[1][0])); + float64_t2 column1 = nbl::hlsl::normalize(float64_t2(newTransformation[0][1], newTransformation[1][1])); + + // Extract fixed screen-space scale from the original transformation + float64_t2 fixedScale = float64_t2( + nbl::hlsl::length(float64_t2(transformation[0][0], transformation[1][0])), + nbl::hlsl::length(float64_t2(transformation[0][1], transformation[1][1]))); + + // Apply fixed scale to normalized directions + column0 *= fixedScale.x; + column1 *= fixedScale.y; + + // Compose final matrix with adjusted columns + newTransformation[0][0] = column0[0]; + newTransformation[1][0] = column0[1]; + newTransformation[0][1] = column1[0]; + newTransformation[1][1] = column1[1]; + + return newTransformation; + } + else + { + // Fallback if transformationType is unrecognized, shouldn't happen + return newTransformation; + } + } + else + { + // Within no active projection scope, return transformation directly + return transformation; + } +} + +uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeLineStyleIndex == InvalidStyleIdx) + activeLineStyleIndex = addLineStyle_SubmitIfNeeded(activeLineStyle, intendedNextSubmit); + + return activeLineStyleIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) + activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); + + return activeDTMSettingsIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeProjectionIndices.empty()) + return InvalidCustomProjectionIndex; - // TODO: Consider resetting MainObjects here as well and addMainObject for the new data again, but account for the fact that mainObjectIndex now changed (either change through uint32_t& or keeping track of "currentMainObj" in drawResourcesFiller -#else - resetMainObjectCounters(); + if (activeProjectionIndices.back() == InvalidCustomProjectionIndex) + activeProjectionIndices.back() = addCustomProjection_SubmitIfNeeded(activeProjections.back(), intendedNextSubmit); + + return activeProjectionIndices.back(); +} - // If there is a mainObject data we need to maintain and keep it's clipProjectionAddr valid - if (mainObjectIndex < maxMainObjects) - { - MainObject mainObjToMaintain = *getMainObject(mainObjectIndex); +uint32_t DrawResourcesFiller::acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeClipRectIndices.empty()) + return InvalidCustomClipRectIndex; - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it exists in geometry data and it was reset) - // `acquireCurrentClipProjectionAddress` shouldn't/won't trigger auto-submit because geometry buffer counters were reset and our geometry buffer is supposed to be larger than a single clipProjectionData - mainObjToMaintain->clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - - // We're calling `addMainObject_Internal` instead of safer `addMainObject_SubmitIfNeeded` because we've reset our mainObject and we're sure this won't need an autoSubmit. - addMainObject_Internal(mainObjToMaintain); - } -#endif + if (activeClipRectIndices.back() == InvalidCustomClipRectIndex) + activeClipRectIndices.back() = addCustomClipRect_SubmitIfNeeded(activeClipRects.back(), intendedNextSubmit); + + return activeClipRectIndices.back(); } -uint32_t DrawResourcesFiller::addMainObject_Internal(const MainObject& mainObject) +uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); - - if (currentMainObjectCount >= MaxIndexableMainObjects) - return InvalidMainObjectIdx; - if (currentMainObjectCount >= maxMainObjects) + if (activeMainObjectIndex != InvalidMainObjectIdx) + return activeMainObjectIndex; + if (activeMainObjectType == MainObjectType::NONE) + { + assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope return InvalidMainObjectIdx; + } - void* dst = mainObjsArray + currentMainObjectCount; - memcpy(dst, &mainObject, sizeof(MainObject)); - uint32_t ret = currentMainObjectCount; - currentMainObjectCount++; - return ret; + const bool needsLineStyle = + (activeMainObjectType == MainObjectType::POLYLINE) || + (activeMainObjectType == MainObjectType::HATCH) || + (activeMainObjectType == MainObjectType::TEXT); + const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM || activeMainObjectType == MainObjectType::GRID_DTM); + const bool needsCustomProjection = (!activeProjectionIndices.empty()); + const bool needsCustomClipRect = (!activeClipRectIndices.empty()); + + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // making sure MainObject and everything it references fits into remaining resources mem + size_t memRequired = sizeof(MainObject); + if (needsLineStyle) memRequired += sizeof(LineStyle); + if (needsDTMSettings) memRequired += sizeof(DTMSettings); + if (needsCustomProjection) memRequired += sizeof(float64_t3x3); + if (needsCustomClipRect) memRequired += sizeof(WorldClipRect); + + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); + + if (needToOverflowSubmit) + { + // failed to fit into remaining resources mem or exceeded max indexable mainobj + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + MainObject mainObject = {}; + // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. + // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? + mainObject.styleIdx = (needsLineStyle) ? acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidStyleIdx; + mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; + mainObject.customProjectionIndex = (needsCustomProjection) ? acquireActiveCustomProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomProjectionIndex; + mainObject.customClipRectIndex = (needsCustomClipRect) ? acquireActiveCustomClipRectIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomClipRectIndex; + mainObject.transformationType = (uint32_t)activeMainObjectTransformationType; + activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); + return activeMainObjectIndex; } -uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) +uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) { - LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); - _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( - LineStyle* stylesArray = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()); - for (uint32_t i = 0u; i < currentLineStylesCount; ++i) + uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); + if (outLineStyleIdx == InvalidStyleIdx) { - const LineStyle& itr = stylesArray[i]; + // There wasn't enough resource memory remaining to fit a single LineStyle + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! - if (itr == gpuLineStyle) - return i; + outLineStyleIdx = addLineStyle_Internal(lineStyle); + assert(outLineStyleIdx != InvalidStyleIdx); } - if (currentLineStylesCount >= maxLineStyles) - return InvalidStyleIdx; - - void* dst = stylesArray + currentLineStylesCount; - memcpy(dst, &gpuLineStyle, sizeof(LineStyle)); - return currentLineStylesCount++; + return outLineStyleIdx; } -uint64_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjectionAddresses.empty()) - return InvalidClipProjectionAddress; + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + if (outDTMSettingIdx == InvalidDTMSettingsIdx) + { + // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! - if (clipProjectionAddresses.back() == InvalidClipProjectionAddress) - clipProjectionAddresses.back() = addClipProjectionData_SubmitIfNeeded(clipProjections.back(), intendedNextSubmit); - - return clipProjectionAddresses.back(); + outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + assert(outDTMSettingIdx != InvalidDTMSettingsIdx); + } + return outDTMSettingIdx; } -uint64_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit) { - uint64_t outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - if (outClipProjectionAddress == InvalidClipProjectionAddress) + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(float64_t3x3); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) { - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - resetGeometryCounters(); - resetMainObjectCounters(); - - outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - assert(outClipProjectionAddress != InvalidClipProjectionAddress); + reset(); // resets everything! be careful! } - return outClipProjectionAddress; + + resourcesCollection.customProjections.vector.push_back(projection); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customProjections.vector.size() - 1u; } -uint64_t DrawResourcesFiller::addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData) +uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit) { - const uint64_t maxGeometryBufferClipProjData = (maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ClipProjectionData); - if (maxGeometryBufferClipProjData <= 0) - return InvalidClipProjectionAddress; - - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &clipProjectionData, sizeof(ClipProjectionData)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(WorldClipRect); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? - const uint64_t ret = currentGeometryBufferSize + geometryBufferAddress; - currentGeometryBufferSize += sizeof(ClipProjectionData); - return ret; + if (!enoughMem) + { + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + resourcesCollection.customClipRects.vector.push_back(clipRect); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customClipRects.vector.size() - 1u; } void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) @@ -738,39 +1909,49 @@ void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyl void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferConnectors = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(PolylineConnector)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferConnectors); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; - const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(connectorsByteSize, alignof(PolylineConnector)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const PolylineConnector& connector = polyline.getConnectors()[currentPolylineConnectorObj]; + memcpy(dst, &connector, connectorsByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::POLYLINE_CONNECTOR) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(PolylineConnector); - } - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& connector = polyline.getConnectors()[currentPolylineConnectorObj]; - memcpy(dst, &connector, connectorsByteSize); - currentGeometryBufferSize += connectorsByteSize; - } + } currentPolylineConnectorObj += objectsToUpload; } @@ -780,154 +1961,535 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const assert(section.count >= 1u); assert(section.type == ObjectType::LINE); - const uint32_t maxGeometryBufferPoints = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(LinePointInfo)); - const uint32_t maxGeometryBufferLines = (maxGeometryBufferPoints <= 1u) ? 0u : maxGeometryBufferPoints - 1u; - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferLines); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + if (remainingResourcesSize < sizeof(LinePointInfo)) + return; + + // how many lines fit into mem? --> memConsumption = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount + const uint32_t uploadableObjects = (remainingResourcesSize - sizeof(LinePointInfo)) / (sizeof(LinePointInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex const uint32_t lineCount = section.count; const uint32_t remainingObjects = lineCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(pointsByteSize, alignof(LinePointInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const LinePointInfo& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); + memcpy(dst, &linePoint, pointsByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::LINE) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(LinePointInfo); - } - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); - memcpy(dst, &linePoint, pointsByteSize); - currentGeometryBufferSize += pointsByteSize; - } + } currentObjectInSection += objectsToUpload; } void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) { - constexpr uint32_t CagesPerQuadBezier = getCageCountPerPolylineObject(ObjectType::QUAD_BEZIER); + constexpr uint32_t CagesPerQuadBezier = 3u; // TODO: Break into 3 beziers in compute shader. + assert(section.type == ObjectType::QUAD_BEZIER); - const uint32_t maxGeometryBufferBeziers = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(QuadraticBezierInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // how many quad bezier objects fit into mem? + // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); + // TODO[ERFAN]: later take into account: our maximum indexable vertex - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferBeziers); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects /= CagesPerQuadBezier; - const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t cagesCount = objectsToUpload * CagesPerQuadBezier; + + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(beziersByteSize, alignof(QuadraticBezierInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const QuadraticBezierInfo& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); + memcpy(dst, &quadBezier, beziersByteSize); + + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < cagesCount; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(cagesCount); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { for (uint16_t subObject = 0; subObject < CagesPerQuadBezier; subObject++) { drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::QUAD_BEZIER) | (subObject << 16)); - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i * CagesPerQuadBezier + subObject] = drawObj; } drawObj.geometryAddress += sizeof(QuadraticBezierInfo); } - // Add Geometry - if (objectsToUpload > 0u) - { - const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); - memcpy(dst, &quadBezier, beziersByteSize); - currentGeometryBufferSize += beziersByteSize; - } currentObjectInSection += objectsToUpload; } void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex) { - const uint32_t maxGeometryBufferHatchBoxes = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(Hatch::CurveHatchBox)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferHatchBoxes); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; - uploadableObjects = core::min(uploadableObjects, remainingObjects); - - for (uint32_t i = 0; i < uploadableObjects; i++) - { - const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(i + currentObjectInSection); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); - uint64_t hatchBoxAddress; - { - static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &hatchBox, sizeof(CurveBox)); - hatchBoxAddress = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(CurveBox); - } + if (objectsToUpload <= 0u) + return; - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); - drawObj.mainObjIndex = mainObjIndex; - drawObj.geometryAddress = hatchBoxAddress; - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount + i; - memcpy(dst, &drawObj, sizeof(DrawObject)); + // Add Geometry + static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); + const auto curveBoxesByteSize = sizeof(Hatch::CurveHatchBox) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(curveBoxesByteSize, alignof(Hatch::CurveHatchBox)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(currentObjectInSection); // WARNING: This is assuming hatch boxes are contigous in memory, TODO: maybe make that more obvious through Hatch interface + memcpy(dst, &hatchBox, curveBoxesByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIndex; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + drawObjectsToBeFilled[i] = drawObj; + drawObj.geometryAddress += sizeof(Hatch::CurveHatchBox); } // Add Indices - currentDrawObjectCount += uploadableObjects; currentObjectInSection += uploadableObjects; } bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferFontGlyphs = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(GlyphInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferFontGlyphs); + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GlyphInfo), alignof(GlyphInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &glyphInfo, sizeof(GlyphInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GridDTMInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GridDTMInfo), alignof(GridDTMInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &gridDTMInfo, sizeof(GridDTMInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::GRID_DTM) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(ImageObjectInfo), alignof(ImageObjectInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &imageObjectInfo, sizeof(ImageObjectInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STATIC_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GeoreferencedImageInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GeoreferencedImageInfo), alignof(GeoreferencedImageInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &georeferencedImageInfo, sizeof(GeoreferencedImageInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STREAMED_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + CachedImageRecord* imageRef = imagesCache->get(imageID); + if (imageRef) + { + textureIdx = imageRef->arrayIndex; + imageRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frane + } + return textureIdx; +} - if (uploadableObjects >= 1u) +void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit) +{ + if (evicted.arrayIndex == InvalidTextureIndex) { - void* geomDst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(geomDst, &glyphInfo, sizeof(GlyphInfo)); - uint64_t fontGlyphAddr = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(GlyphInfo); + m_logger.log("evictImage_SubmitIfNeeded: `evicted.arrayIndex == InvalidTextureIndex` is true, shouldn't happen under normal circumstances.", nbl::system::ILogger::ELL_WARNING); + _NBL_DEBUG_BREAK_IF(true); + return; + } + // Later used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = fontGlyphAddr; - void* drawObjDst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(drawObjDst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - return true; + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); } else { - return false; + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } +} + +DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded( + const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName) +{ + ImageAllocateResults ret = {}; + + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + bool alreadyBlockedForDeferredFrees = false; + + // Attempt to create a GPU image and corresponding image view for this texture. + // If creation or memory allocation fails (likely due to VRAM exhaustion), + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageRecord image remains. + while (imagesCache->size() > 0u) + { + // Try creating the image and allocating memory for it: + nbl::video::IGPUImage::SCreationParams params = {}; + params = imageParams; + + if (imageViewFormatOverride != asset::E_FORMAT::EF_COUNT && imageViewFormatOverride != imageParams.format) + { + // TODO: figure out why this crashes the app + //params.viewFormats.set(static_cast(imageViewFormatOverride), true); + params.flags |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + } + auto gpuImage = device->createImage(std::move(params)); + + if (gpuImage) + { + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); + uint32_t actualAlignment = 1u << gpuImageMemoryRequirements.alignmentLog2; + const bool imageMemoryRequirementsMatch = + (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuImageMemoryRequirements.requiresDedicatedAllocation == false) && // should not require dedicated allocation + ((ImagesMemorySubAllocator::MaxMemoryAlignment % actualAlignment) == 0u); // should be consistent with our suballocator's max alignment + + if (imageMemoryRequirementsMatch) + { + ret.allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + const bool allocationFromImagesMemoryArenaSuccessfull = ret.allocationOffset != ImagesMemorySubAllocator::InvalidAddress; + if (allocationFromImagesMemoryArenaSuccessfull) + { + ret.allocationSize = gpuImageMemoryRequirements.size; + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = gpuImage.get(), + .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } + }; + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + gpuImage->setObjectDebugName(imageDebugName.c_str()); + IGPUImageView::SCreationParams viewParams = { + .image = gpuImage, + .viewType = IGPUImageView::ET_2D, + .format = (imageViewFormatOverride == asset::E_FORMAT::EF_COUNT) ? gpuImage->getCreationParameters().format : imageViewFormatOverride + }; + ret.gpuImageView = device->createImageView(std::move(viewParams)); + if (ret.gpuImageView) + { + // SUCCESS! + ret.gpuImageView->setObjectDebugName((imageDebugName + " View").c_str()); + } + else + { + // irrecoverable error if simple image creation fails. + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImageView creation failed, that's rare and irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + + // succcessful with everything, just break and get out of this retry loop + break; + } + else + { + // irrecoverable error if simple bindImageMemory fails. + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: bindImageMemory failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); + // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry + } + } + else + { + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: memory requirements of the gpu image doesn't match our preallocated device memory, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImage creation failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + + // Getting here means we failed creating or allocating the image, evict and retry. + + + // If imageCache size is 1 it means there is nothing else to evict, but there may still be already evicts/frees queued up. + // `cull_frees` will make sure all pending deallocations will be blocked for. + if (imagesCache->size() == 1u && alreadyBlockedForDeferredFrees) + { + // We give up, it's really nothing we can do, no image to evict (alreadyBlockedForDeferredFrees==1) and no more memory to free up (alreadyBlockedForDeferredFrees). + // We probably have evicted almost every other texture except the one we just allocated an index for. + // This is most likely due to current image memory requirement being greater than the whole memory allocated for all images + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: failed allocating an image, there is nothing more from mcache to evict, the current memory requirement is simply greater than the whole memory allocated for all images.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + + if (imagesCache->size() > 1u) + { + const image_id evictionCandidate = imagesCache->select_eviction_candidate(); + CachedImageRecord* imageRef = imagesCache->peek(evictionCandidate); + if (imageRef) + evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); + imagesCache->erase(evictionCandidate); + } + + while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + alreadyBlockedForDeferredFrees = true; + + // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference + // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. + } + + return ret; +} + +void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams) +{ + // Decide whether the image can reside fully into memory rather than get streamed. + // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not + // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) + const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; + + if (betterToResideFullyInMem) + outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + else + outImageType = ImageType::GEOREFERENCED_STREAMED; + + outImageParams.type = asset::IImage::ET_2D; + outImageParams.samples = asset::IImage::ESCF_1_BIT; + outImageParams.format = georeferencedImageParams.format; + + if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + { + outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; + } + else + { + // TODO: Better Logic, area around the view, etc... + outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u }; } + + + outImageParams.mipLevels = 1u; // TODO: Later do mipmapping + outImageParams.arrayLayers = 1u; } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) @@ -940,45 +2502,94 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt getHatchFillPatternMSDF = func; } -uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) +{ + currentFrameIndex++; + // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index + // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} + +uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + MSDFReference* tRef = msdfLRUCache->get(msdfInfo); + if (tRef) + { + textureIdx = tRef->alloc_idx; + tRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frame + } + return textureIdx; +} + +uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { if (!cpuImage) - return InvalidTextureIdx; // TODO: Log + { + m_logger.log("addMSDFTexture: cpuImage is nullptr.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } const auto cpuImageSize = cpuImage->getMipSize(0); const bool sizeMatch = cpuImageSize.x == getMSDFResolution().x && cpuImageSize.y == getMSDFResolution().y && cpuImageSize.z == 1u; if (!sizeMatch) - return InvalidTextureIdx; // TODO: Log - - // TextureReferences hold the semaValue related to the "scratch semaphore" in IntendedSubmitInfo - // Every single submit increases this value by 1 - // The reason for hiolding on to the lastUsedSema is deferred dealloc, which we call in the case of eviction, making sure we get rid of the entry inside the allocator only when the texture is done being used - const auto nextSemaSignal = intendedNextSubmit.getFutureScratchSemaphore(); + { + m_logger.log("addMSDFTexture: cpuImage size doesn't match with msdf array image.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } + /* + * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. + * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. + */ auto evictionCallback = [&](const MSDFReference& evicted) { - if (msdfTextureArrayIndicesUsed.contains(evicted.alloc_idx)) + // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: + // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. + // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. + // + // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images + // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + + if (imageUsedForNextIntendedSubmit) { - // Dealloc once submission is finished - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); - - // If we reset main objects will cause an auto submission bug, where adding an msdf texture while constructing glyphs will have wrong main object references (See how SingleLineTexts add Glyphs with a single mainObject) - // for the same reason we don't reset line styles - // `submitCurrentObjectsAndReset` function handles the above + updating clipProjectionData and making sure the mainObjectIdx references to the correct clipProj data after reseting geometry buffer - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } else { - // We didn't use it this frame, so it's safe to dealloc now, withou needing to "overflow" submit - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx); + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } + + // Clear CPU-side metadata associated with the evicted slot. + msdfImagesState[evicted.alloc_idx].evict(); }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema - MSDFReference* inserted = msdfLRUCache->insert(msdfInput, nextSemaSignal.value, evictionCallback); + MSDFReference* inserted = msdfLRUCache->insert(msdfInput, currentFrameIndex, evictionCallback); - // if inserted->alloc_idx was not InvalidTextureIdx then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx - if (inserted->alloc_idx == InvalidTextureIdx) + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // if cachedImageRecord->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx + if (inserted->alloc_idx == InvalidTextureIndex) { // New insertion == cache miss happened and insertion was successfull inserted->alloc_idx = IndexAllocator::AddressAllocator::invalid_address; @@ -986,19 +2597,31 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We queue copy and finalize all on `finalizeTextureCopies` function called before draw calls to make sure it's in mem - msdfTextureCopies.push_back({ .image = std::move(cpuImage), .index = inserted->alloc_idx }); + // We stage msdfImagesState, pushMSDFImagesUploads will push it into GPU + msdfImagesState[inserted->alloc_idx].image = std::move(cpuImage); + msdfImagesState[inserted->alloc_idx].uploadedToGPU = false; } else { - // TODO: log here, assert will be called in a few lines - inserted->alloc_idx = InvalidTextureIdx; + m_logger.log("addMSDFTexture: index allocation failed.", nbl::system::ILogger::ELL_ERROR); + inserted->alloc_idx = InvalidTextureIndex; } } - assert(inserted->alloc_idx != InvalidTextureIdx); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - if (inserted->alloc_idx != InvalidTextureIdx) - msdfTextureArrayIndicesUsed.emplace(inserted->alloc_idx); + assert(inserted->alloc_idx != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->alloc_idx; +} + +void DrawResourcesFiller::flushDrawObjects() +{ + if (resourcesCollection.drawObjects.getCount() > drawObjectsFlushedToDrawCalls) + { + DrawCallData drawCall = {}; + drawCall.isDTMRendering = false; + drawCall.drawObj.drawObjectStart = drawObjectsFlushedToDrawCalls; + drawCall.drawObj.drawObjectCount = resourcesCollection.drawObjects.getCount() - drawObjectsFlushedToDrawCalls; + drawCalls.push_back(drawCall); + drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); + } } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index e20514651..1a74338e7 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -1,11 +1,13 @@ #pragma once #include "Polyline.h" +#include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" +#include "Images.h" #include #include #include - +// #include using namespace nbl; using namespace nbl::video; using namespace nbl::core; @@ -13,20 +15,8 @@ using namespace nbl::asset; using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 16u); -static_assert(sizeof(Globals) == 128u); +static_assert(sizeof(MainObject) == 20u); static_assert(sizeof(LineStyle) == 88u); -static_assert(sizeof(ClipProjectionData) == 88u); - -template -struct DrawBuffers -{ - smart_refctd_ptr indexBuffer; // only is valid for IGPUBuffer because it's filled at allocation time and never touched again - smart_refctd_ptr mainObjectsBuffer; - smart_refctd_ptr drawObjectsBuffer; - smart_refctd_ptr geometryBuffer; - smart_refctd_ptr lineStylesBuffer; -}; // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. @@ -37,27 +27,149 @@ struct DrawBuffers struct DrawResourcesFiller { public: + + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses + static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; + static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB - typedef uint32_t index_buffer_type; + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources + struct ResourceBase + { + static constexpr size_t InvalidBufferOffset = ~0u; + size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued + virtual size_t getCount() const = 0; + virtual size_t getStorageSize() const = 0; + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); } + }; - DrawResourcesFiller(); + /// @brief ResourceBase reserved for compute shader stages input/output + template + struct ReservedComputeResource : ResourceBase + { + size_t count = 0ull; + size_t getCount() const override { return count; } + size_t getStorageSize() const override { return count * sizeof(T); } + }; - DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue); + /// @brief ResourceBase which is filled by CPU, packed and sent to GPU + template + struct CPUGeneratedResource : ResourceBase + { + core::vector vector; + size_t getCount() const { return vector.size(); } + size_t getStorageSize() const { return vector.size() * sizeof(T); } + + /// @return pointer to start of the data to be filled, up to additionalCount + T* increaseCountAndGetPtr(size_t additionalCount) + { + size_t offset = vector.size(); + vector.resize(offset + additionalCount); + return &vector[offset]; + } - typedef std::function SubmitFunc; - void setSubmitDrawsFunction(const SubmitFunc& func); + /// @brief increases size of general-purpose resources that hold bytes + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @return pointer to start of the data to be filled, up to additional size + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + { + assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); + size_t offset = core::alignUp(vector.size(), alignment); + vector.resize(offset + additionalSize); + return offset; + } + + uint32_t addAndGetOffset(const T& val) + { + vector.push_back(val); + return vector.size() - 1u; + } - void allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t indices); + T* data() { return vector.data(); } + }; + + /// @brief struct to hold all resources + // TODO: rename to staged resources buffers or something like that + struct ResourcesCollection + { + // auto-submission level 0 resources (settings that mainObj references) + CPUGeneratedResource lineStyles; + CPUGeneratedResource dtmSettings; + CPUGeneratedResource customProjections; + CPUGeneratedResource customClipRects; + + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) + CPUGeneratedResource mainObjects; + + // auto-submission level 2 buffers + CPUGeneratedResource drawObjects; + CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) + + // Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment + // used to decide the remaining memory and when to overflow + size_t calculateTotalConsumption() const + { + return + lineStyles.getAlignedStorageSize() + + dtmSettings.getAlignedStorageSize() + + customProjections.getAlignedStorageSize() + + customClipRects.getAlignedStorageSize() + + mainObjects.getAlignedStorageSize() + + drawObjects.getAlignedStorageSize() + + indexBuffer.getAlignedStorageSize() + + geometryInfo.getAlignedStorageSize(); + } + }; + + DrawResourcesFiller(); - void allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects); + DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); - void allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects); + typedef std::function SubmitFunc; + void setSubmitDrawsFunction(const SubmitFunc& func); + + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding + void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); - void allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size); + /// @brief Get minimum required size for resources buffer (containing objects and geometry info and their settings) + static constexpr size_t getMinimumRequiredResourcesBufferSize() + { + // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer + // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) + return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(WorldClipRect) + sizeof(float64_t3x3) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, GPUStructsMaxNaturalAlignment); + } - void allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount); + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); - void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + + bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); // functions that user should set to get MSDF texture if it's not available in cache. // it's up to user to return cached or generate on the fly. @@ -66,6 +178,13 @@ struct DrawResourcesFiller void setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func); void setHatchFillMSDFTextureFunction(const GetHatchFillPatternMSDFTextureFunc& func); + // Must be called at the end of each frame. + // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. + // Registers the semaphore/value that will signal completion of this frame�s draw, + // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. + // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; + void markFrameUsageComplete(uint64_t drawSubmitWaitValue); + // TODO[Przemek]: try to draft up a `CTriangleMesh` Class in it's own header (like CPolyline), simplest form is basically two cpu buffers (1 array of uint index buffer, 1 array of float64_t3 vertexBuffer) // TODO[Przemek]: Then have a `drawMesh` function here similar to drawXXX's below, this will fit both vertex and index buffer in the `geometryBuffer`. // take a `SIntendedSubmitInfo` like others, but don't use it as I don't want you to handle anything regarding autoSubmit @@ -74,8 +193,19 @@ struct DrawResourcesFiller //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); - void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + //! Draws a fixed-geometry polyline using a custom transformation. + //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. + void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); + + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) + /// WARNING: make sure this function is called within begin/endMainObject scope + void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit); + // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( const Hatch& hatch, @@ -96,8 +226,36 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); + + //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + // ! Fixed-geometry Hatch with MSDF Pattern + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); - // ! Draw Font Glyph, will auto submit if there is no space + // ! Solid Fill Fixed-geometry Hatch + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + /// Used by SingleLineText, Issue drawing a font glyph + /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, @@ -105,113 +263,142 @@ struct DrawResourcesFiller float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); - - void _test_addImageObject( - float64_t2 topLeftPos, - float32_t2 size, - float32_t rotation, - SIntendedSubmitInfo& intendedNextSubmit) - { - auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool - { - const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferImageObjects); - - if (uploadableObjects >= 1u) - { - void* dstGeom = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dstGeom, &imageObjectInfo, sizeof(ImageObjectInfo)); - uint64_t geomBufferAddr = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(ImageObjectInfo); - - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geomBufferAddr; - void* dstDrawObj = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dstDrawObj, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; - - return true; - } - else - return false; - }; - - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, intendedNextSubmit); - - ImageObjectInfo info = {}; - info.topLeft = topLeftPos; - info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // - info.aspectRatio = size.y / size.x; - info.textureID = 0u; - if (!addImageObject_Internal(info, mainObjIdx)) - { - // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addImageObject_Internal(info, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object - } - } - - bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - inline uint32_t getLineStyleCount() const { return currentLineStylesCount; } - inline uint32_t getDrawObjectCount() const { return currentDrawObjectCount; } + void drawGridDTM(const float64_t2& topLeft, + float64_t2 worldSpaceExtents, + float gridCellWidth, + uint64_t textureID, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit, + const bool drawGridOnly = false); + + /** + * @brief Adds a static 2D image to the draw resource set for rendering. + * + * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. + * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. + * + * If the image is already cached and its slot is valid, it returns true; + * Otherwise, it performs the following: + * - Allocates a new descriptor set slot. + * - Promotes the image format to be GPU-compatible. + * - Creates a GPU image and GPU image view. + * - Queues the image for uploading via staging in the next submit. + * - If memory is constrained, attempts to evict other images to free up space. + * + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. + * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. + * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers + * a flush of pending draws to preserve correctness. + * + * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. + * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. + */ + bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Ensures that multiple static 2D images are resident and ready for rendering. + * + * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` + * for each. Afterward, it verifies that none of the newly ensured images have been evicted, + * which could happen due to limited VRAM or memory fragmentation. + * + * This function is expected to succeed if: + * - The number of images does not exceed `ImagesBindingArraySize`. + * - Each image individually fits into the image memory arena. + * - There is enough VRAM to hold all images simultaneously. + * + * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return true If all images were successfully made resident and none were evicted during the process. + * @return false If: + * - The number of images exceeds the descriptor binding array size. + * - Any individual image could not be made resident (e.g., larger than the allocator can support). + * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) + */ + bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. + * + * If the specified image ID is not already present in the cache, or if the cached version is incompatible + * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, + * creates the image and its view, to be bound to a descriptor binding in the future. + * + * If the image already exists and matches the requested parameters, its usage metadata is updated. + * In either case, the cache is updated to reflect usage in the current frame. + * + * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * + * @param imageID Unique identifier of the image to add or reuse. + * @param params Georeferenced Image Params + * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * + * @return true if the image was successfully cached and is ready for use; false if allocation failed. + * [TODO]: should be internal protected member function. + */ + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + + // [TODO]: should be internal protected member function. + bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); + + // This function must be called immediately after `addStaticImage` for the same imageID. + void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); + + // This function must be called immediately after `addStaticImage` for the same imageID. + void addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); - inline uint32_t getMainObjectCount() const { return currentMainObjectCount; } + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); - inline size_t getCurrentMainObjectsBufferSize() const + /// @brief resets staging buffers and images + void reset() { - return sizeof(MainObject) * currentMainObjectCount; + resetDrawObjects(); + resetMainObjects(); + resetCustomProjections(); + resetCustomClipRects(); + resetLineStyles(); + resetDTMSettings(); + + drawObjectsFlushedToDrawCalls = 0ull; + drawCalls.clear(); } - inline size_t getCurrentDrawObjectsBufferSize() const - { - return sizeof(DrawObject) * currentDrawObjectCount; - } + /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders + const ResourcesCollection& getResourcesCollection() const; - inline size_t getCurrentGeometryBufferSize() const - { - return currentGeometryBufferSize; - } + /// @brief buffer containing all non-texture type resources + nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } - inline size_t getCurrentLineStylesBufferSize() const - { - return sizeof(LineStyle) * currentLineStylesCount; - } + /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` + const size_t getCopiedResourcesSize() { return copiedResourcesSize; } - void reset() - { - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - } + // Setting Active Resources: + void setActiveLineStyle(const LineStyleInfo& lineStyle); + /** + * @param disableHeightRelatedDTMModes disables E_DTM_MODE::CONTOUR and E_DTOM_MODE::HEIGHT_SHADING, necessary when we want to draw a grid DTM without using a height map texture + */ + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo, const bool disableHeightRelatedDTMModes = false); - DrawBuffers cpuDrawBuffers; - DrawBuffers gpuDrawBuffers; + void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); + void endMainObject(); - uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + void pushCustomProjection(const float64_t3x3& projection); + void popCustomProjection(); - // TODO[Przemek]: Read after reading the fragment shader comments and having a basic understanding of the relationship between "mainObject" and our programmable blending resolve: - // Use `addMainObject_SubmitIfNeeded` to push your single mainObject you'll be using for the enitre triangle mesh (this will ensure overlaps between triangles of the same mesh is resolved correctly) - // Delete comment when you understand this - - // [ADVANCED] Do not use this function unless you know what you're doing (It may cause auto submit) - // Never call this function multiple times in a row before indexing it in a drawable, because future auto-submits may invalidate mainObjects, so do them one by one, for example: - // Valid: addMainObject1 --> addXXX(mainObj1) ---> addMainObject2 ---> addXXX(mainObj2) .... - // Invalid: addMainObject1 ---> addMainObject2 ---> addXXX(mainObj1) ---> addXXX(mainObj2) .... - uint32_t addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit); + void pushCustomClipRect(const WorldClipRect& clipRect); + void popCustomClipRect(); - // we need to store the clip projection stack to make sure the front is always available in memory - void pushClipProjectionData(const ClipProjectionData& clipProjectionData); - void popClipProjectionData(); - const std::deque& getClipProjectionStack() const { return clipProjections; } + const std::deque& getCustomProjectionStack() const { return activeProjections; } + const std::deque& getCustomClipRectsStack() const { return activeClipRects; } smart_refctd_ptr getMSDFsTextureArray() { return msdfTextureArray; } @@ -223,100 +410,317 @@ struct DrawResourcesFiller return msdfTextureArray->getCreationParameters().image->getCreationParameters().mipLevels; } -protected: - - struct MSDFTextureCopy + /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj + uint32_t getActiveMainObjectIndex() const; + + struct MSDFImageState { core::smart_refctd_ptr image; - uint32_t index; + bool uploadedToGPU : 1u; + + bool isValid() const { return image.get() != nullptr; } + void evict() + { + image = nullptr; + uploadedToGPU = false; + } }; - SubmitFunc submitDraws; - - bool finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + // NOTE: Most probably Going to get removed soon with a single draw call in GPU-driven rendering + struct DrawCallData + { + union + { + struct Dtm + { + uint64_t indexBufferOffset; + uint64_t indexCount; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + } dtm; + struct DrawObj + { + uint64_t drawObjectStart = 0ull; + uint64_t drawObjectCount = 0ull; + } drawObj; + }; + bool isDTMRendering; + }; - bool finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + const std::vector& getDrawCalls() const; + + /// @brief Stores all CPU-side resources that were staged and prepared for a single GPU submission. + /// + /// *** This cache includes anything used or referenced from DrawResourcesFiller in the Draw Submit: + /// - Buffer data (geometry, indices, etc.) + /// - MSDF CPU images + /// - Draw call metadata + /// - Active MainObject Index --> this is another state of the submit that we need to store + /// + /// The data is fully preprocessed and ready to be pushed to the GPU with no further transformation. + /// This enables efficient replays without traversing or re-generating scene content. + struct ReplayCache + { + std::vector drawCallsData; + ResourcesCollection resourcesCollection; + std::vector msdfImagesState; + std::unique_ptr imagesCache; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + // TODO: non msdf general CPU Images + // TODO: Get total memory consumption for logging? + }; - bool finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Creates a snapshot of all currently staged CPU-side resourcesCollection for future replay or deferred submission. + /// + /// @warning This cache corresponds to a **single intended GPU submit**. + /// If your frame submission overflows into multiple submits due to staging memory limits or batching, + /// you are responsible for creating **multiple ReplayCache instances**, one per submit. + /// + /// @return A heap-allocated ReplayCache containing a copy of all staged CPU-side resourcesCollection and draw call data. + std::unique_ptr createReplayCache(); + + /// @brief Redirects all subsequent resource upload and getters to use an external ReplayCache. + /// + /// After calling this function, staging, resource getters, and upload mechanisms will pull data from the given ReplayCache + /// instead of the internal accumulation cache. + /// + /// User is responsible for management of cache and making sure it's alive in the ReplayCache scope + void setReplayCache(ReplayCache* cache); - bool finalizeCustomClipProjectionCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - bool finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Reverts internal logic to use the default internal staging and resource accumulation cache. + /// Must be called once per corresponding `pushReplayCacheUse()`. + void unsetReplayCache(); + +protected: - // Internal Function to call whenever we overflow while filling our buffers with geometry (potential limiters: indexBuffer, drawObjectsBuffer or geometryBuffer) - // ! mainObjIdx: is the mainObject the "overflowed" drawObjects belong to. - // mainObjIdx is required to ensure that valid data, especially the `clipProjectionData`, remains linked to the main object. - // This is important because, while other data may change during overflow handling, the main object must persist to maintain consistency throughout rendering all parts of it. (for example all lines and beziers of a single polyline) - // [ADVANCED] If you have not created your mainObject yet, pass `InvalidMainObjectIdx` (See drawHatch) - void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex); + SubmitFunc submitDraws; - uint32_t addMainObject_Internal(const MainObject& mainObject); + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. + bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); - uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + /// @brief binds cached images into their correct descriptor set slot if not already resident. + bool bindImagesToArrayIndices(ImagesCache& imagesCache); - // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer - // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. - uint64_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Records GPU copy commands for all staged images into the active command buffer. + bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); - uint64_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); + /// @brief copies the queued up streamed copies. + bool pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); - uint64_t addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData); + const size_t calculateRemainingResourcesSize() const; - static constexpr uint32_t getCageCountPerPolylineObject(ObjectType type) - { - if (type == ObjectType::LINE) - return 1u; - else if (type == ObjectType::QUAD_BEZIER) - return 3u; - return 0u; - }; + /// @brief Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects + /// @param intendedNextSubmit + /// @param mainObjectIndex: function updates mainObjectIndex after submitting, clearing everything and acquiring mainObjectIndex again. + void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex); - void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); + // Gets resource index to the active projection data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active clip data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active main object data + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + /// Attempts to add lineStyle to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add custom projection to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add custom clip to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit); + + /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations + uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + + /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Computes the final transformation matrix for fixed geometry rendering, + * considering any active custom projections and the transformation type. + * + * This function handles how a given transformation should be applied depending on the + * current transformation type and the presence of any active projection matrices. + * + * - If no active projection exists, the input transformation is returned unmodified. + * + * - If an active projection exists: + * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. + * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, + * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. + * + * @param transformation The input 3x3 transformation matrix to apply. + * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). + * + */ + float64_t3x3 getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const; + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations + void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); + + /// Attempts to upload as many draw objects as possible within the given polyline connectors considering resource limitations + void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given hatch considering resource limitations void addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex); + /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); - void resetMainObjectCounters() + /// Attempts to upload a single GridDTMInfo considering resource limitations + bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); + /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) + bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; + + /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) + bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx);; + + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. + * + * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). + * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid + * use-after-free issues. Otherwise, it proceeds with deallocation immediately. + * + * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator + * only after the GPU has finished using it, guarded by a semaphore wait. + * + * @param imageID The unique ID of the image being evicted. + * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. + * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. + * + * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] + */ + void evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit); + + struct ImageAllocateResults { - inMemMainObjectCount = 0u; - currentMainObjectCount = 0u; - } + nbl::core::smart_refctd_ptr gpuImageView = nullptr; + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; + bool isValid() const { return (gpuImageView && (allocationOffset != ImagesMemorySubAllocator::InvalidAddress)); } + }; + + /** + * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. + * + * This function tries to create a GPU image using the specified creation parameters, allocate memory + * from the shared image memory arena, bind it to device-local memory, and create an associated image view. + * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal + * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. + * + * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. + * + * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image + * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. + * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. + * + * @return ImageAllocateResults A struct containing: + * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). + * - `allocationSize`: Size of the allocated memory region. + * - `gpuImageView`: The created GPU image view (nullptr if creation failed). + */ + ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName); + + /** + * @brief Determines creation parameters for a georeferenced image based on heuristics. + * + * This function decides whether a georeferenced image should be treated as a fully resident GPU texture + * or as a streamable image based on the relationship between its total resolution and the viewport size. + * It then fills out the appropriate Nabla image creation parameters. + * + * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). + * @param[out] outImageType Indicates whether the image should be fully resident or streamed. + * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. + */ + void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); + + /** + * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter + */ + void drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + TransformationType transformationType = TransformationType::TT_NORMAL); - // WARN: If you plan to use this, make sure you either reset the mainObjectCounters as well - // Or if you want to keep your mainObject around, make sure you're using the `submitCurrentObjectsAndReset` function instead of calling this directly - // So that it makes your mainObject point to the correct clipProjectionData (which exists in the geometry buffer) - void resetGeometryCounters() + void resetMainObjects() { - inMemDrawObjectCount = 0u; - currentDrawObjectCount = 0u; + resourcesCollection.mainObjects.vector.clear(); + activeMainObjectIndex = InvalidMainObjectIdx; + } - inMemGeometryBufferSize = 0u; - currentGeometryBufferSize = 0u; + // these resources are data related to chunks of a whole mainObject + void resetDrawObjects() + { + resourcesCollection.drawObjects.vector.clear(); + resourcesCollection.indexBuffer.vector.clear(); + resourcesCollection.geometryInfo.vector.clear(); + } - // Invalidate all the clip projection addresses because geometry buffer got reset - for (auto& clipProjAddr : clipProjectionAddresses) - clipProjAddr = InvalidClipProjectionAddress; + void resetCustomProjections() + { + resourcesCollection.customProjections.vector.clear(); + + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeProjectionIndices) + addr = InvalidCustomProjectionIndex; } - void resetLineStyleCounters() + void resetCustomClipRects() { - currentLineStylesCount = 0u; - inMemLineStylesCount = 0u; + resourcesCollection.customClipRects.vector.clear(); + + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeClipRectIndices) + addr = InvalidCustomClipRectIndex; } - MainObject* getMainObject(uint32_t idx) + void resetLineStyles() { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); - return &mainObjsArray[idx]; + resourcesCollection.lineStyles.vector.clear(); + activeLineStyleIndex = InvalidStyleIdx; } + void resetDTMSettings() + { + resourcesCollection.dtmSettings.vector.clear(); + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t { @@ -377,75 +781,90 @@ struct DrawResourcesFiller }; struct MSDFInputInfoHash { std::size_t operator()(const MSDFInputInfo& info) const { return info.lookupHash; } }; - + struct MSDFReference { uint32_t alloc_idx; - uint64_t lastUsedSemaphoreValue; + uint64_t lastUsedFrameIndex; - MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedSemaphoreValue(semaphoreVal) {} - MSDFReference(uint64_t semaphoreVal) : MSDFReference(InvalidTextureIdx, semaphoreVal) {} - MSDFReference() : MSDFReference(InvalidTextureIdx, ~0ull) {} + MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedFrameIndex(semaphoreVal) {} + MSDFReference(uint64_t currentFrameIndex) : MSDFReference(InvalidTextureIndex, currentFrameIndex) {} + MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` - inline MSDFReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } + inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; - uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, SIntendedSubmitInfo& intendedNextSubmit) - { - uint32_t textureIdx = InvalidTextureIdx; - MSDFReference* tRef = msdfLRUCache->get(msdfInfo); - if (tRef) - { - textureIdx = tRef->alloc_idx; - tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit - } - return textureIdx; - } + uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - // ! mainObjIdx: make sure to pass your mainObjIdx to it if you want it to stay synced/updated if some overflow submit occured which would potentially erase what your mainObject points at. - // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx - uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + // Flushes Current Draw Call and adds to drawCalls + void flushDrawObjects(); + + // Logger + nbl::system::logger_opt_smart_ptr m_logger = nullptr; + + // FrameIndex used as a criteria for resource/image eviction in case of limitations + uint32_t currentFrameIndex = 0u; + + // Replay Cache override + ReplayCache* currentReplayCache = nullptr; + + // DrawCalls Data + uint64_t drawObjectsFlushedToDrawCalls = 0ull; + std::vector drawCalls; // either dtms or objects + + // ResourcesCollection and packed into GPUBuffer + ResourcesCollection resourcesCollection; + IDeviceMemoryAllocator::SAllocation buffersMemoryArena; + nbl::core::smart_refctd_ptr resourcesGPUBuffer; + size_t copiedResourcesSize; + + // GPUImages Memory Arena + AddressAllocator + IDeviceMemoryAllocator::SAllocation imagesMemoryArena; + smart_refctd_ptr imagesMemorySubAllocator; + // Members smart_refctd_ptr m_utilities; IQueue* m_copyQueue; - uint32_t maxIndexCount; - - uint32_t inMemMainObjectCount = 0u; - uint32_t currentMainObjectCount = 0u; - uint32_t maxMainObjects = 0u; - - uint32_t inMemDrawObjectCount = 0u; - uint32_t currentDrawObjectCount = 0u; - uint32_t maxDrawObjects = 0u; + // Active Resources we need to keep track of and push to resources buffer if needed. + LineStyleInfo activeLineStyle; + uint32_t activeLineStyleIndex = InvalidStyleIdx; - uint64_t inMemGeometryBufferSize = 0u; - uint64_t currentGeometryBufferSize = 0u; - uint64_t maxGeometryBufferSize = 0u; + DTMSettingsInfo activeDTMSettings; + uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; - uint32_t inMemLineStylesCount = 0u; - uint32_t currentLineStylesCount = 0u; - uint32_t maxLineStyles = 0u; + MainObjectType activeMainObjectType; + TransformationType activeMainObjectTransformationType; - uint64_t geometryBufferAddress = 0u; // Actual BDA offset 0 of the gpu buffer + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; - std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. - std::deque clipProjectionAddresses; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops + // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order + std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. + std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops + + std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. + std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops - // MSDF GetGlyphMSDFTextureFunc getGlyphMSDF; GetHatchFillPatternMSDFTextureFunc getHatchFillPatternMSDF; - using MSDFsLRUCache = core::LRUCache; + using MSDFsLRUCache = core::ResizableLRUCache; smart_refctd_ptr msdfTextureArray; // view to the resource holding all the msdfs in it's layers smart_refctd_ptr msdfTextureArrayIndexAllocator; - std::set msdfTextureArrayIndicesUsed = {}; // indices in the msdf texture array allocator that have been used in the current frame // TODO: make this a dynamic bitset - std::vector msdfTextureCopies = {}; // queued up texture copies std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow - static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; + std::vector msdfImagesState = {}; // cached cpu imaged + their status, size equals to LRUCache size + static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; + + // Images: + std::unique_ptr imagesCache; + smart_refctd_ptr suballocatedDescriptorSet; + uint32_t imagesArrayBinding = 0u; + + std::unordered_map> streamedImageCopies; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h new file mode 100644 index 000000000..a341eadd6 --- /dev/null +++ b/62_CAD/Images.h @@ -0,0 +1,219 @@ +#pragma once +using namespace nbl; +using namespace nbl::video; +using namespace nbl::core; +using namespace nbl::asset; + +using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. + +enum class ImageState : uint8_t +{ + INVALID = 0, + CREATED_AND_MEMORY_BOUND, // GPU image created, not bound to descriptor set yet + BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident, but may contain uninitialized or partial data + GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully +}; + +enum class ImageType : uint8_t +{ + INVALID = 0, + STATIC, // Regular non-georeferenced image, fully loaded once + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view + GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant +}; + +struct GeoreferencedImageParams +{ + OrientedBoundingBox2D worldspaceOBB = {}; + uint32_t2 imageExtents = {}; + uint32_t2 viewportExtents = {}; + asset::E_FORMAT format = {}; + // TODO: Need to add other stuff later. +}; + +/** + * @class ImagesMemorySubAllocator + * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. + * + * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation + * for image resources within a contiguous block of GPU memory. + * + * @note This class only manages address offsets. The actual memory must be bound separately. + */ +class ImagesMemorySubAllocator : public core::IReferenceCounted +{ +public: + using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; + using ReservedAllocator = nbl::core::allocator; + static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; + static constexpr uint64_t MaxMemoryAlignment = 4096u; // safe choice based on hardware reports + static constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better + + ImagesMemorySubAllocator(uint64_t memoryArenaSize) + { + m_reservedAllocSize = AddressAllocator::reserved_size(MaxMemoryAlignment, memoryArenaSize, MinAllocSize); + m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); + m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); + m_addressAllocator = std::unique_ptr(new AddressAllocator( + m_reservedAlloc, 0u, 0u, MaxMemoryAlignment, memoryArenaSize, MinAllocSize + )); + } + + // return offset, will return InvalidAddress if failed + uint64_t allocate(uint64_t size, uint64_t alignment) + { + return m_addressAllocator->alloc_addr(size, alignment); + } + + void deallocate(uint64_t addr, uint64_t size) + { + m_addressAllocator->free_addr(addr, size); + } + + ~ImagesMemorySubAllocator() + { + if (m_reservedAlloc) + m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); + } + +private: + std::unique_ptr m_addressAllocator = nullptr; + + // Memory Allocation Required for the AddressAllocator + std::unique_ptr m_reservedAllocator = nullptr; + void* m_reservedAlloc = nullptr; + size_t m_reservedAllocSize = 0; + +}; + +// This will be dropped when the descriptor gets dropped from SuballocatedDescriptorSet. +// Destructor will then deallocate from GeneralPurposeAllocator, making the previously allocated range of the image available/free again. +struct ImageCleanup : public core::IReferenceCounted +{ + ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) + {} + + ~ImageCleanup() override + { + // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); + } + + smart_refctd_ptr imagesMemorySuballocator; + uint64_t addr; + uint64_t size; + +}; + +struct CachedImageRecord +{ + static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; + + uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding + ImageType type = ImageType::INVALID; + ImageState state = ImageState::INVALID; + uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; + core::smart_refctd_ptr gpuImageView = nullptr; + core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. + + // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value + CachedImageRecord(uint64_t currentFrameIndex) + : arrayIndex(InvalidTextureIndex) + , type(ImageType::INVALID) + , state(ImageState::INVALID) + , lastUsedFrameIndex(currentFrameIndex) + , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) + , allocationSize(0ull) + , gpuImageView(nullptr) + , staticCPUImage(nullptr) + {} + + CachedImageRecord() + : CachedImageRecord(0ull) + {} + + // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` + inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } +}; + +// A resource-aware image cache with an LRU eviction policy. +// This cache tracks image usage by ID and provides hooks for eviction logic (such as releasing descriptor slots and deallocating GPU memory done by user of this class) +// Currently, eviction is purely LRU-based. In the future, eviction decisions may incorporate additional factors: +// - memory usage per image. +// - lastUsedFrameIndex. +// This class helps coordinate images' lifetimes in sync with GPU usage via eviction callbacks. +class ImagesCache : public core::ResizableLRUCache +{ +public: + using base_t = core::ResizableLRUCache; + + ImagesCache(size_t capacity) + : base_t(capacity) + {} + + // Attempts to insert a new image into the cache. + // If the cache is full, invokes the provided `evictCallback` to evict an image. + // Returns a pointer to the inserted or existing ImageReference. + template EvictionCallback> + inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) + { + return base_t::insert(imageID, lastUsedSema, evictCallback); + } + + // Retrieves the image associated with `imageID`, updating its LRU position. + inline CachedImageRecord* get(image_id imageID) + { + return base_t::get(imageID); + } + + // Retrieves the ImageReference without updating LRU order. + inline CachedImageRecord* peek(image_id imageID) + { + return base_t::peek(imageID); + } + + inline size_t size() const { return base_t::size(); } + + // Selects an eviction candidate based on LRU policy. + // In the future, this could factor in memory pressure or semaphore sync requirements. + inline image_id select_eviction_candidate() + { + const image_id* lru = base_t::get_least_recently_used(); + if (lru) + return *lru; + else + { + // we shouldn't select eviction candidate if lruCache is empty + _NBL_DEBUG_BREAK_IF(true); + return ~0ull; + } + } + + // Removes a specific image from the cache (manual eviction). + inline void erase(image_id imageID) + { + base_t::erase(imageID); + } +}; + +struct StreamedImageCopy +{ + asset::E_FORMAT srcFormat; + core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + asset::IImage::SBufferCopy region; +}; + +// TODO: Rename to StaticImageAvailabilityRequest? +struct StaticImageInfo +{ + image_id imageID = ~0ull; + core::smart_refctd_ptr cpuImage = nullptr; + bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + asset::E_FORMAT imageViewFormatOverride = asset::E_FORMAT::EF_COUNT; // if asset::E_FORMAT::EF_COUNT then image view will have the same format as `cpuImage` +}; diff --git a/62_CAD/Polyline.h b/62_CAD/Polyline.h index 03b2f2c30..bee5650c7 100644 --- a/62_CAD/Polyline.h +++ b/62_CAD/Polyline.h @@ -66,8 +66,6 @@ struct LineStyleInfo rigidSegmentIdx = InvalidRigidSegmentIndex; phaseShift = 0.0f; - assert(stipplePatternUnnormalizedRepresentation.size() <= StipplePatternMaxSize); - if (stipplePatternUnnormalizedRepresentation.size() == 0) { stipplePatternSize = 0; @@ -110,6 +108,8 @@ struct LineStyleInfo stipplePatternTransformed[0] += stipplePatternTransformed[stipplePatternTransformed.size() - 1]; stipplePatternTransformed.pop_back(); } + + assert(stipplePatternTransformed.size() <= StipplePatternMaxSize); if (stipplePatternTransformed.size() != 1) { diff --git a/62_CAD/SingleLineText.cpp b/62_CAD/SingleLineText.cpp index 4b41cb628..ea755a2df 100644 --- a/62_CAD/SingleLineText.cpp +++ b/62_CAD/SingleLineText.cpp @@ -63,8 +63,8 @@ void SingleLineText::Draw( lineStyle.color = color; lineStyle.screenSpaceLineWidth = tan(tiltTiltAngle); lineStyle.worldSpaceLineWidth = boldInPixels; - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - auto glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + drawResourcesFiller.setActiveLineStyle(lineStyle); + drawResourcesFiller.beginMainObject(MainObjectType::TEXT); for (const auto& glyphBox : m_glyphBoxes) { @@ -75,7 +75,8 @@ void SingleLineText::Draw( // float32_t3 xx = float64_t3(0.0, -glyphBox.size.y, 0.0); const float32_t aspectRatio = static_cast(glm::length(dirV) / glm::length(dirU)); // check if you can just do: (glyphBox.size.y * scale.y) / glyphBox.size.x * scale.x) const float32_t2 minUV = face->getUV(float32_t2(0.0f,0.0f), glyphBox.size, drawResourcesFiller.getMSDFResolution(), MSDFPixelRange); - drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, glyphObjectIdx, intendedNextSubmit); + drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, intendedNextSubmit); } + drawResourcesFiller.endMainObject(); } \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 637c88eda..5cb4082bd 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -45,6 +45,7 @@ static constexpr bool DebugModeWireframe = false; static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; +static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic enum class ExampleMode { @@ -57,6 +58,9 @@ enum class ExampleMode CASE_6, // Custom Clip Projections CASE_7, // Images CASE_8, // MSDF and Text + CASE_9, // DTM + CASE_BUG, // Bug Repro, after fix, rename to CASE_10 and comment should be: testing fixed geometry and emulated fp64 corner cases + CASE_11, // grid DTM CASE_COUNT }; @@ -71,9 +75,12 @@ constexpr std::array cameraExtents = 10.0, // CASE_6 10.0, // CASE_7 600.0, // CASE_8 + 600.0, // CASE_9 + 10.0, // CASE_BUG + 1000.0 // CASE_11 }; -constexpr ExampleMode mode = ExampleMode::CASE_4; +constexpr ExampleMode mode = ExampleMode::CASE_11; class Camera2D { @@ -236,7 +243,7 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::fill(m_framebuffers.begin(),m_framebuffers.end(),nullptr); } - // For creating extra per-image or swapchain resources you might need + // For creating extra per-image or swapchain resourcesCollection you might need virtual inline bool onCreateSwapchain_impl(const uint8_t qFam) { auto device = const_cast(m_renderpass->getOriginDevice()); @@ -270,6 +277,87 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::array,ISwapchain::MaxImages> m_framebuffers; }; + +// TODO: Move this funcitons that help with creating a new promoted CPUImage +template +struct PromotionComponentSwizzle +{ + template + void operator()(const InT* in, OutT* out) const + { + using in_t = std::conditional_t, uint64_t, InT>; + using out_t = std::conditional_t, uint64_t, OutT>; + + reinterpret_cast(out)[0u] = reinterpret_cast(in)[0u]; + + if constexpr (SRC_CHANNELS > 1) + reinterpret_cast(out)[1u] = reinterpret_cast(in)[1u]; + else + reinterpret_cast(out)[1u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 2) + reinterpret_cast(out)[2u] = reinterpret_cast(in)[2u]; + else + reinterpret_cast(out)[2u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 3) + reinterpret_cast(out)[3u] = reinterpret_cast(in)[3u]; + else + reinterpret_cast(out)[3u] = static_cast(1); + } +}; +template +bool performCopyUsingImageFilter( + const core::smart_refctd_ptr& inCPUImage, + const core::smart_refctd_ptr& outCPUImage) +{ + Filter filter; + + const uint32_t mipLevels = inCPUImage->getCreationParameters().mipLevels; + + for (uint32_t level = 0u; level < mipLevels; ++level) + { + const auto regions = inCPUImage->getRegions(level); + + for (auto& region : regions) + { + typename Filter::state_type state = {}; + state.extent = region.imageExtent; + state.layerCount = region.imageSubresource.layerCount; + state.inImage = inCPUImage.get(); + state.outImage = outCPUImage.get(); + state.inOffsetBaseLayer = core::vectorSIMDu32(region.imageOffset.x, region.imageOffset.y, region.imageOffset.z, region.imageSubresource.baseArrayLayer); + state.outOffsetBaseLayer = core::vectorSIMDu32(0u); + state.inMipLevel = region.imageSubresource.mipLevel; + state.outMipLevel = region.imageSubresource.mipLevel; + + if (!filter.execute(core::execution::par_unseq, &state)) + return false; + } + } + return true; +} + +bool performImageFormatPromotionCopy(const core::smart_refctd_ptr& inCPUImage, const core::smart_refctd_ptr& outCPUImage) +{ + asset::E_FORMAT srcImageFormat = inCPUImage->getCreationParameters().format; + asset::E_FORMAT dstImageFormat = outCPUImage->getCreationParameters().format; + + // In = srcData, Out = stagingBuffer + if (srcImageFormat == dstImageFormat) + return false; + + auto srcChannelCount = asset::getFormatChannelCount(srcImageFormat); + if (srcChannelCount == 1u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 2u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 3u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); +} + class ComputerAidedDesign final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication { using device_base_t = examples::SimpleWindowedApplication; @@ -282,22 +370,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu constexpr static uint32_t MaxSubmitsInFlight = 16u; public: - void allocateResources(uint32_t maxObjects) + void allocateResources() { - drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); - - // TODO: move individual allocations to DrawResourcesFiller::allocateResources(memory) - // Issue warning error, if we can't store our largest geomm struct + clip proj data inside geometry buffer along linestyle and mainObject - uint32_t maxIndices = maxObjects * 6u * 2u; - drawResourcesFiller.allocateIndexBuffer(m_device.get(), maxIndices); - drawResourcesFiller.allocateMainObjectsBuffer(m_device.get(), maxObjects); - drawResourcesFiller.allocateDrawObjectsBuffer(m_device.get(), maxObjects * 5u); - drawResourcesFiller.allocateStylesBuffer(m_device.get(), 512u); - - // * 3 because I just assume there is on average 3x beziers per actual object (cause we approximate other curves/arcs with beziers now) - // + 128 ClipProjData - size_t geometryBufferSize = maxObjects * sizeof(QuadraticBezierInfo) * 3 + 128 * sizeof(ClipProjectionData); - drawResourcesFiller.allocateGeometryBuffer(m_device.get(), geometryBufferSize); + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { @@ -311,14 +390,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get()); } - size_t sumBufferSizes = - drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.geometryBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getSize(); - m_logger->log("Buffers Size = %.2fKB", ILogger::E_LOG_LEVEL::ELL_INFO, sumBufferSizes / 1024.0f); - // pseudoStencil { asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT; @@ -399,22 +470,44 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape - samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; - samplerParams.AnisotropicFilter = 3; - samplerParams.CompareEnable = false; - samplerParams.CompareFunc = ECO_GREATER; - samplerParams.LodBias = 0.f; - samplerParams.MinLod = -1000.f; - samplerParams.MaxLod = 1000.f; - msdfTextureSampler = m_device->createSampler(samplerParams); - + // MSDF Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + msdfImageSampler = m_device->createSampler(samplerParams); + } + + // Static Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + staticImageSampler = m_device->createSampler(samplerParams); + } + // Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage // Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here { @@ -640,7 +733,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu double dt = 0; double m_timeElapsed = 0.0; std::chrono::steady_clock::time_point lastTime; - uint32_t m_hatchDebugStep = 0u; + uint32_t m_hatchDebugStep = 10u; + E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; inline bool onAppInitialized(smart_refctd_ptr&& system) override { @@ -670,7 +764,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{})) return logFail("Could not initialize the Surface!"); - allocateResources(1024 * 1024u); + allocateResources(); const bitflag bindlessTextureFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT | @@ -678,6 +772,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; // Create DescriptorSetLayout, PipelineLayout and update DescriptorSets + const uint32_t imagesBinding = 3u; { video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { { @@ -689,45 +784,24 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }, { .binding = 1u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 2u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 3u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 4u, .type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 5u, + .binding = 2u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 6u, + .binding = imagesBinding, .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = bindlessTextureFlags, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 128u, + .count = ImagesBindingArraySize, }, }; descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); @@ -767,7 +841,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout0)); descriptorSet1 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout1)); - constexpr uint32_t DescriptorCountSet0 = 6u; + constexpr uint32_t DescriptorCountSet0 = 3u; video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; // Descriptors For Set 0: @@ -775,27 +849,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size; descriptorInfosSet0[0u].desc = m_globalsBuffer; - descriptorInfosSet0[1u].info.buffer.offset = 0u; - descriptorInfosSet0[1u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[1u].desc = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer; - - descriptorInfosSet0[2u].info.buffer.offset = 0u; - descriptorInfosSet0[2u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[2u].desc = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer; - - descriptorInfosSet0[3u].info.buffer.offset = 0u; - descriptorInfosSet0[3u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getCreationParams().size; - descriptorInfosSet0[3u].desc = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer; - - descriptorInfosSet0[4u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfosSet0[4u].info.combinedImageSampler.sampler = msdfTextureSampler; - descriptorInfosSet0[4u].desc = drawResourcesFiller.getMSDFsTextureArray(); + descriptorInfosSet0[1u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfosSet0[1u].info.combinedImageSampler.sampler = msdfImageSampler; + descriptorInfosSet0[1u].desc = drawResourcesFiller.getMSDFsTextureArray(); - descriptorInfosSet0[5u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? + descriptorInfosSet0[2u].desc = staticImageSampler; // TODO[Erfan]: different sampler and make immutable? // This is bindless to we write to it later. - // descriptorInfosSet0[6u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - // descriptorInfosSet0[6u].desc = drawResourcesFiller.getMSDFsTextureArray(); + // descriptorInfosSet0[3u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + // descriptorInfosSet0[3u].desc = drawResourcesFiller.getMSDFsTextureArray(); // Descriptors For Set 1: constexpr uint32_t DescriptorCountSet1 = 2u; @@ -812,62 +874,54 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[DescriptorUpdatesCount] = {}; // Set 0 Updates: + // globals descriptorUpdates[0u].dstSet = descriptorSet0.get(); descriptorUpdates[0u].binding = 0u; descriptorUpdates[0u].arrayElement = 0u; descriptorUpdates[0u].count = 1u; descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; + // mdfs textures descriptorUpdates[1u].dstSet = descriptorSet0.get(); descriptorUpdates[1u].binding = 1u; descriptorUpdates[1u].arrayElement = 0u; descriptorUpdates[1u].count = 1u; descriptorUpdates[1u].info = &descriptorInfosSet0[1u]; - + + // general texture sampler descriptorUpdates[2u].dstSet = descriptorSet0.get(); descriptorUpdates[2u].binding = 2u; descriptorUpdates[2u].arrayElement = 0u; descriptorUpdates[2u].count = 1u; descriptorUpdates[2u].info = &descriptorInfosSet0[2u]; - descriptorUpdates[3u].dstSet = descriptorSet0.get(); - descriptorUpdates[3u].binding = 3u; + // Set 1 Updates: + descriptorUpdates[3u].dstSet = descriptorSet1.get(); + descriptorUpdates[3u].binding = 0u; descriptorUpdates[3u].arrayElement = 0u; descriptorUpdates[3u].count = 1u; - descriptorUpdates[3u].info = &descriptorInfosSet0[3u]; - - descriptorUpdates[4u].dstSet = descriptorSet0.get(); - descriptorUpdates[4u].binding = 4u; + descriptorUpdates[3u].info = &descriptorInfosSet1[0u]; + + descriptorUpdates[4u].dstSet = descriptorSet1.get(); + descriptorUpdates[4u].binding = 1u; descriptorUpdates[4u].arrayElement = 0u; descriptorUpdates[4u].count = 1u; - descriptorUpdates[4u].info = &descriptorInfosSet0[4u]; - - descriptorUpdates[5u].dstSet = descriptorSet0.get(); - descriptorUpdates[5u].binding = 5u; - descriptorUpdates[5u].arrayElement = 0u; - descriptorUpdates[5u].count = 1u; - descriptorUpdates[5u].info = &descriptorInfosSet0[5u]; - - // Set 1 Updates: - descriptorUpdates[6u].dstSet = descriptorSet1.get(); - descriptorUpdates[6u].binding = 0u; - descriptorUpdates[6u].arrayElement = 0u; - descriptorUpdates[6u].count = 1u; - descriptorUpdates[6u].info = &descriptorInfosSet1[0u]; - - descriptorUpdates[7u].dstSet = descriptorSet1.get(); - descriptorUpdates[7u].binding = 1u; - descriptorUpdates[7u].arrayElement = 0u; - descriptorUpdates[7u].count = 1u; - descriptorUpdates[7u].info = &descriptorInfosSet1[1u]; - + descriptorUpdates[4u].info = &descriptorInfosSet1[1u]; m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); } - pipelineLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + }; + + pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); } + drawResourcesFiller.setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr(descriptorSet0), imagesBinding); + smart_refctd_ptr mainPipelineFragmentShaders = {}; smart_refctd_ptr mainPipelineVertexShader = {}; std::array, 2u> geoTexturePipelineShaders = {}; @@ -925,14 +979,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto mainPipelineFragmentCpuShader = loadCompileShader("../shaders/main_pipeline/fragment.hlsl", IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY); auto mainPipelineVertexCpuShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineVertCpuShader = loadCompileShader(GeoTextureRenderer::VertexShaderRelativePath, IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineFragCpuShader = loadCompileShader(GeoTextureRenderer::FragmentShaderRelativePath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); + // auto geoTexturePipelineVertCpuShader = loadCompileShader(GeoTextureRenderer::VertexShaderRelativePath, IShader::E_SHADER_STAGE::ESS_VERTEX); + // auto geoTexturePipelineFragCpuShader = loadCompileShader(GeoTextureRenderer::FragmentShaderRelativePath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); mainPipelineFragmentCpuShader->setShaderStage(IShader::E_SHADER_STAGE::ESS_FRAGMENT); mainPipelineFragmentShaders = m_device->createShader({ mainPipelineFragmentCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); mainPipelineVertexShader = m_device->createShader({ mainPipelineVertexCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[0] = m_device->createShader({ geoTexturePipelineVertCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[1] = m_device->createShader({ geoTexturePipelineFragCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + // geoTexturePipelineShaders[0] = m_device->createShader({ geoTexturePipelineVertCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + // geoTexturePipelineShaders[1] = m_device->createShader({ geoTexturePipelineFragCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); core::smart_refctd_ptr shaderWriteCacheFile; { @@ -1069,7 +1123,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu ); m_geoTextureRenderer = std::unique_ptr(new GeoTextureRenderer(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger))); - m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); + // m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); // Create the Semaphores m_renderSemaphore = m_device->createSemaphore(0ull); @@ -1090,6 +1144,158 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + // Load image + system::path m_loadCWD = ".."; + std::string imagePaths[] = + { + "../../media/color_space_test/R8G8B8_1.jpg", + "../../media/color_space_test/R8G8B8_1.png", + "../../media/color_space_test/R8G8B8A8_2.png", + "../../media/color_space_test/R8G8B8A8_1.png", + }; + + /** + * @param formatOverride override format of an image view, use special argument asset::E_FORMAT::EF_COUNT to don't override image view format and use one retrieved from the loaded image + */ + auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr + { + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + return nullptr; + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + auto& flags = image->getCreationParameters().flags; + // assert if asset is mutable + const_cast&>(flags) |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto loadedCPUImage = cpuImgView->getCreationParameters().image; + const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + // Promoting the image to a format GPU supports. (so that updateImageViaStagingBuffer doesn't have to handle that each frame if overflow-submit needs to happen) + auto promotedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + promotedCPUImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = promotedCPUImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(promotedCPUImageCreationParams.usage) + }; + promotedCPUImageCreationParams.format = m_physicalDevice->promoteImageFormat(request,video::IGPUImage::TILING::OPTIMAL); + } + + if (loadedCPUImageCreationParams.format != promotedCPUImageCreationParams.format) + { + smart_refctd_ptr promotedCPUImage = ICPUImage::create(promotedCPUImageCreationParams); + core::rational bytesPerPixel = asset::getBytesPerPixel(promotedCPUImageCreationParams.format); + + const auto extent = loadedCPUImageCreationParams.extent; + const uint32_t mipLevels = loadedCPUImageCreationParams.mipLevels; + const uint32_t arrayLayers = loadedCPUImageCreationParams.arrayLayers; + + // Only supporting 1 mip, it's just for test.. + const size_t byteSize = (bytesPerPixel * extent.width * extent.height * extent.depth * arrayLayers).getIntegerApprox(); // TODO: consider mips + ICPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = byteSize; + smart_refctd_ptr promotedCPUImageBuffer = ICPUBuffer::create(std::move(bufferCreationParams)); + + auto newRegions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = newRegions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; // TODO + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = arrayLayers; + region.bufferOffset = 0u; + region.bufferRowLength = 0u; + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = extent; + promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); + + performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); + return promotedCPUImage; + } + else + { + return loadedCPUImage; + } + }; + + for (const auto& imagePath : imagePaths) + { + auto image = loadImage(imagePath); + if (image) + sampleImages.push_back(image); + } + + gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + + // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly + { + // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that + + auto* region = gridDTMHeightMap->getRegion(0, core::vectorSIMDu32(0.0f)); + auto imageExtent = region->getExtent(); + auto imagePixelSize = asset::getBytesPerPixel(gridDTMHeightMap->getCreationParameters().format).getIntegerApprox(); + float* imageData = static_cast(gridDTMHeightMap->getBuffer()->getPointer()) + region->bufferOffset; + const size_t imageByteSize = gridDTMHeightMap->getImageDataSizeInBytes(); + assert(imageByteSize % sizeof(float) == 0); + + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, 1); + + for (int i = 0; i < imageByteSize; i += sizeof(float)) + { + const bool isTexelEven = static_cast(dist(mt)); + E_CELL_DIAGONAL diagonal = isTexelEven ? TOP_LEFT_TO_BOTTOM_RIGHT : BOTTOM_LEFT_TO_TOP_RIGHT; + + setDiagonalModeBit(imageData, diagonal); + imageData++; + } + + } + + assert(gridDTMHeightMap); + return true; } @@ -1129,10 +1335,30 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { m_hatchDebugStep--; } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + } } } , m_logger.get()); + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u; + if (isCachingDraw) + { + SIntendedSubmitInfo invalidSubmit = {}; + addObjects(invalidSubmit); // if any overflows happen here, it will add to our replay cache and not submit anything + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + finishedCachingDraw = true; + } if (!beginFrameRender()) return; @@ -1153,10 +1379,28 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered }; m_intendedNextSubmit.waitSemaphores = waitSems; - addObjects(m_intendedNextSubmit); - + if (CacheAndReplay) + { + // to size-1u because we only want to submit overflows here. + for (uint32_t i = 0u; i < replayCaches.size() - 1u; ++i) + { + drawResourcesFiller.setReplayCache(replayCaches[i].get()); + submitDraws(m_intendedNextSubmit, true); + drawResourcesFiller.unsetReplayCache(); + } + if (!replayCaches.empty()) + drawResourcesFiller.setReplayCache(replayCaches.back().get()); + } + else + { + addObjects(m_intendedNextSubmit); + } + endFrameRender(m_intendedNextSubmit); + if (CacheAndReplay) + drawResourcesFiller.unsetReplayCache(); + #ifdef BENCHMARK_TILL_FIRST_FRAME if (!stopBenchamrkFlag) { @@ -1201,23 +1445,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); // cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cb->beginDebugMarker("Frame"); - - float64_t3x3 projectionToNDC; - projectionToNDC = m_Camera.constructViewProjection(); - - Globals globalData = {}; - globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; - globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - globalData.defaultClipProjection.projectionToNDC = projectionToNDC; - globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); - globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); - auto screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); - globalData.screenToWorldRatio = screenToWorld; - globalData.worldToScreenRatio = (1.0/screenToWorld); - globalData.miterLimit = 10.0f; - SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; - bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); - assert(updateSuccess); nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -1248,10 +1475,53 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) { + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u && !finishedCachingDraw; + if (isCachingDraw) + { + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + return; // we don't record, submit or do anything, just caching the draw resources + } + + drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; - auto&r = drawResourcesFiller; + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + float64_t3x3 projectionToNDC; + projectionToNDC = m_Camera.constructViewProjection(); + + Globals globalData = {}; + uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); + globalData.pointers = { + .lineStyles = baseAddress + resourcesCollection.lineStyles.bufferOffset, + .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, + .customProjections = baseAddress + resourcesCollection.customProjections.bufferOffset, + .customClipRects = baseAddress + resourcesCollection.customClipRects.bufferOffset, + .mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset, + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + }; + globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; + globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; + globalData.defaultProjectionToNDC = projectionToNDC; + float screenToWorld = getScreenToWorldRatio(globalData.defaultProjectionToNDC, globalData.resolution); + globalData.screenToWorldRatio = screenToWorld; + globalData.worldToScreenRatio = (1.0f/screenToWorld); + globalData.screenToWorldScaleTransform = float64_t3x3(globalData.worldToScreenRatio, 0.0f, 0.0f, + 0.0f, globalData.worldToScreenRatio, 0.0f, + 0.0f, 0.0f, 1.0f); + globalData.miterLimit = 10.0f; + globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; + bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); + assert(updateSuccess); + asset::SViewport vp = { .x = 0u, @@ -1272,25 +1542,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pipelineBarriersBeforeDraw { - constexpr uint32_t MaxBufferBarriersCount = 6u; + constexpr uint32_t MaxBufferBarriersCount = 2u; uint32_t bufferBarriersCount = 0u; IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); - // Index Buffer Copy Barrier -> Only do once at the beginning of the frames - if (m_realFrameIx == 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::INDEX_READ_BIT; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer, - }; - } if (m_globalsBuffer->getSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; @@ -1305,60 +1562,18 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu .buffer = m_globalsBuffer, }; } - if (drawResourcesFiller.getCurrentDrawObjectsBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentDrawObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentGeometryBufferSize() > 0u) + if (drawResourcesFiller.getCopiedResourcesSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS; bufferBarrier.range = { .offset = 0u, - .size = drawResourcesFiller.getCurrentGeometryBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer, - }; - } - if (drawResourcesFiller.getCurrentMainObjectsBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentMainObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentLineStylesBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentLineStylesBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer, + .size = drawResourcesFiller.getCopiedResourcesSize(), + .buffer = drawResourcesFiller.getResourcesGPUBuffer(), }; } cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} }); @@ -1383,22 +1598,43 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - const uint32_t currentIndexCount = drawResourcesFiller.getDrawObjectCount() * 6u; + IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + + cb->bindGraphicsPipeline(graphicsPipeline.get()); - // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` - // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) + { + if (drawCall.isDTMRendering) + { + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); - // TODO[Przemek]: binding the same pipelie, no need to change. - cb->bindGraphicsPipeline(graphicsPipeline.get()); - - // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. + PushConstants pc = { + .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, + .triangleMeshMainObjectIndex = drawCall.dtm.triangleMeshMainObjectIndex, + .isDTMRendering = true + }; + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + cb->drawIndexed(drawCall.dtm.indexCount, 1u, 0u, 0u, 0u); + } + else + { + PushConstants pc = { + .isDTMRendering = false + }; + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + const uint64_t indexOffset = drawCall.drawObj.drawObjectStart * 6u; + const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; + + // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); + } + } - // TODO[Przemek]: draw parameters needs to reflect the mesh involved - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); if (fragmentShaderInterlockEnabled) { cb->bindGraphicsPipeline(resolveAlphaGraphicsPipeline.get()); @@ -1407,14 +1643,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if constexpr (DebugModeWireframe) { + const uint32_t indexCount = resourcesCollection.drawObjects.getCount() * 6u; cb->bindGraphicsPipeline(debugGraphicsPipeline.get()); - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } - + cb->endRenderPass(); if (!inBetweenSubmit) cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); if (inBetweenSubmit) { @@ -1480,6 +1719,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { auto retval = device_base_t::getRequiredDeviceFeatures(); retval.fragmentShaderPixelInterlock = FragmentShaderPixelInterlock; + retval.nullDescriptor = true; + return retval; + } + + virtual video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + video::SPhysicalDeviceLimits retval = base_t::getRequiredDeviceLimits(); + retval.fragmentShaderBarycentric = true; + return retval; } @@ -1489,32 +1737,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; retval.validations = true; - retval.synchronizationValidation = true; + retval.synchronizationValidation = false; return retval; } protected: void addObjects(SIntendedSubmitInfo& intendedNextSubmit) { - - // TODO[Przemek]: add your own case, you won't call any other drawResourcesFiller function, only drawMesh with your custom made Mesh (for start it can be a single triangle) - - // we record upload of our objects and if we failed to allocate we submit everything - if (!intendedNextSubmit.valid()) - { - // log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } - - // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state - auto* cmdbuf = m_currentRecordingCommandBufferInfo->cmdbuf; - - assert(cmdbuf->getState() == video::IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable()); - assert(cmdbuf->getRecordingFlags().hasFlags(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)); - - auto* cmdpool = cmdbuf->getPool(); - drawResourcesFiller.setSubmitDrawsFunction( [&](SIntendedSubmitInfo& intendedNextSubmit) { @@ -1951,8 +2180,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo style = {}; style.screenSpaceLineWidth = 4.0f; - style.worldSpaceLineWidth = 0.0f; - style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + style.worldSpaceLineWidth = 2.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.1f); LineStyleInfo style2 = {}; style2.screenSpaceLineWidth = 2.0f; @@ -2025,7 +2254,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu myCurve.majorAxis = { -10.0, 5.0 }; myCurve.center = { 0, -5.0 }; myCurve.angleBounds = { - nbl::core::PI() * 2.0, + nbl::core::PI() * 1.0, nbl::core::PI() * 0.0 }; myCurve.eccentricity = 1.0; @@ -2053,10 +2282,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } drawResourcesFiller.drawPolyline(originalPolyline, style, intendedNextSubmit); - //CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); - //drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); + CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(10.0 * 0.0009))); + CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(10.0 * 0.0009))); + drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); + drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); } else if (mode == ExampleMode::CASE_4) { @@ -2768,16 +2997,20 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_6) { - // left half of screen should be red and right half should be green - const auto& cameraProj = m_Camera.constructViewProjection(); - ClipProjectionData showLeft = {}; - showLeft.projectionToNDC = cameraProj; - showLeft.minClipNDC = float32_t2(-1.0, -1.0); - showLeft.maxClipNDC = float32_t2(0.0, +1.0); - ClipProjectionData showRight = {}; - showRight.projectionToNDC = cameraProj; - showRight.minClipNDC = float32_t2(0.0, -1.0); - showRight.maxClipNDC = float32_t2(+1.0, +1.0); + float64_t3x3 customProjection = float64_t3x3{ + 1.0, 0.0, cos(m_timeElapsed * 0.0005) * 100.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + + /// [NOTE]: We set minClip and maxClip (in default worldspace) in such a way that minClip.y > maxClip.y so that minClipNDC.y < maxClipNDC.y + // left half should be red and right half should be green + WorldClipRect showLeft = {}; + showLeft.minClip = float64_t2(-100.0, +1000.0); + showLeft.maxClip = float64_t2(0.0, -1000.0); + WorldClipRect showRight = {}; + showRight.minClip = float64_t2(0.0, +1000.0); + showRight.maxClip = float64_t2(100.0, -1000.0); LineStyleInfo leftLineStyle = {}; leftLineStyle.screenSpaceLineWidth = 3.0f; @@ -2832,181 +3065,55 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } // we do redundant and nested push/pops to test - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); + drawResourcesFiller.pushCustomProjection(customProjection); { drawResourcesFiller.drawPolyline(polyline1, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomProjection(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); { drawResourcesFiller.drawPolyline(polyline3, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } else if (mode == ExampleMode::CASE_7) { - if (m_realFrameIx == 0u) + for (uint32_t i = 0; i < sampleImages.size(); ++i) { - // Load image - system::path m_loadCWD = ".."; - std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; - - constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); - const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(),m_loadCWD); - auto bundle = m_assetMgr->getAsset(imagePath,loadParams); - auto contents = bundle.getContents(); - if (contents.empty()) - { - m_logger->log("Failed to load image with path %s, skipping!",ILogger::ELL_ERROR,(m_loadCWD/imagePath).c_str()); - } - - smart_refctd_ptr cpuImgView; - const auto& asset = contents[0]; - switch (asset->getAssetType()) - { - case IAsset::ET_IMAGE: - { - auto image = smart_refctd_ptr_static_cast(asset); - const auto format = image->getCreationParameters().format; - - ICPUImageView::SCreationParams viewParams = { - .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, - .image = std::move(image), - .viewType = IImageView::E_TYPE::ET_2D, - .format = format, - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - } - }; - - cpuImgView = ICPUImageView::create(std::move(viewParams)); - } break; - - case IAsset::ET_IMAGE_VIEW: - cpuImgView = smart_refctd_ptr_static_cast(asset); - break; - default: - m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!",ILogger::ELL_ERROR); - } - - - // create matching size gpu image - smart_refctd_ptr gpuImg; - const auto& origParams = cpuImgView->getCreationParameters(); - const auto origImage = origParams.image; - IGPUImage::SCreationParams imageParams = {}; - imageParams = origImage->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) - }; - imageParams.format = m_physicalDevice->promoteImageFormat(request,imageParams.tiling); - } - gpuImg = m_device->createImage(std::move(imageParams)); - if (!gpuImg || !m_device->allocate(gpuImg->getMemoryReqs(),gpuImg.get()).isValid()) - m_logger->log("Failed to create or allocate gpu image!",ILogger::ELL_ERROR); - gpuImg->setObjectDebugName(imagePath.c_str()); - - IGPUImageView::SCreationParams viewParams = { - .image = gpuImg, - .viewType = IGPUImageView::ET_2D, - .format = gpuImg->getCreationParameters().format - }; - auto gpuImgView = m_device->createImageView(std::move(viewParams)); - - // Bind gpu image view to descriptor set - video::IGPUDescriptorSet::SDescriptorInfo dsInfo; - dsInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - dsInfo.desc = gpuImgView; - - IGPUDescriptorSet::SWriteDescriptorSet dsWrites[1u] = - { - { - .dstSet = descriptorSet0.get(), - .binding = 6u, - .arrayElement = 0u, - .count = 1u, - .info = &dsInfo, - } - }; - m_device->updateDescriptorSets(1u, dsWrites, 0u, nullptr); - - // Upload Loaded CPUImageData to GPU - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - } - }; - - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - m_utils->updateImageViaStagingBuffer( - intendedNextSubmit, - origImage->getBuffer()->getPointer(), origImage->getCreationParameters().format, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - origImage->getRegions()); - - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t afterCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from + //printf(std::format("\n Image {} \n", i).c_str()); + drawResourcesFiller.ensureStaticImageAvailability({ imageID, sampleImages[i] }, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); + //printf("\n"); } - drawResourcesFiller._test_addImageObject({ 0.0, 0.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); - drawResourcesFiller._test_addImageObject({ 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + + GeoreferencedImageParams geoRefParams = {}; + geoRefParams.format = asset::EF_R8G8B8A8_SRGB; + geoRefParams.imageExtents = uint32_t2 (2048, 2048); + geoRefParams.viewportExtents = (m_realFrameIx <= 5u) ? uint32_t2(1280, 720) : uint32_t2(3840, 2160); // to test trigerring resize/recreation + // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); LineStyleInfo lineStyle = { @@ -3019,8 +3126,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { std::vector linePoints; linePoints.push_back({ 0.0, 0.0 }); - linePoints.push_back({ 100.0, 0.0 }); - linePoints.push_back({ 100.0, -100.0 }); + linePoints.push_back({ 1.0, 0.0 }); + linePoints.push_back({ 1.0, -1.0 }); polyline.addLinePoints(linePoints); } drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); @@ -3076,7 +3183,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.0f); singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-250.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.5f); // singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), nbl::core::PI() * abs(cos(m_timeElapsed * 0.00005))); - // Smaller text to test mip maps + // Smaller text to test level maps //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-130.0), float32_t2(0.4, 0.4), rotation); //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-150.0), float32_t2(0.2, 0.2), rotation); } @@ -3090,15 +3197,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto penY = -500.0; auto previous = 0; - uint32_t glyphObjectIdx; - { - LineStyleInfo lineStyle = {}; - lineStyle.color = float32_t4(1.0, 1.0, 1.0, 1.0); - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - - glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); - } - float64_t2 currentBaselineStart = float64_t2(0.0, 0.0); float64_t scale = 1.0 / 64.0; @@ -3231,7 +3329,375 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); + else if (mode == ExampleMode::CASE_9) + { + // GRID (outdated) + /*core::vector vertices = { + { float32_t2(-200.0f, -200.0f), 10.0f }, + { float32_t2(-50.0f, -200.0f), 50.0f }, + { float32_t2(100.0f, -200.0f), 90.0f }, + { float32_t2(-125.0f, -70.1f), 10.0f }, + { float32_t2(25.0f, -70.1f), 50.0f }, + { float32_t2(175.0f, -70.1f), 90.0f }, + { float32_t2(-200.0f, 59.8f), 10.0f }, + { float32_t2(-50.0f, 59.8f), 50.0f }, + { float32_t2(100.0f, 59.8f), 90.0f }, + { float32_t2(-125.0f, 189.7f), 10.0f }, + { float32_t2(25.0f, 189.7f), 50.0f }, + { float32_t2(175.0f, 189.7f), 90.0f } + }; + + core::vector indices = { + 0, 3, 1, + 1, 3, 4, + 1, 2, 4, + 2, 4, 5, + 3, 4, 6, + 4, 6, 7, + 4, 5, 7, + 5, 7, 8, + 6, 7, 9, + 7, 9, 10, + 7, 8, 10, + 8, 10, 11 + };*/ + + // PYRAMID + core::vector vertices = { + //{ float64_t2(0.0, 0.0), 100.0 }, //0 + //{ float64_t2(-200.0, -200.0), 10.0 }, //1 + //{ float64_t2(200.0, -200.0), 10.0 }, //2 + //{ float64_t2(200.0, 200.0), -20.0 }, //3 + //{ float64_t2(-200.0, 200.0), 10.0 }, //4 + + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, + { float64_t2(200.0, -100.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, -100.0), 10.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, + }; + + core::vector indices = { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 + }; + + // SINGLE TRIANGLE + /*core::vector vertices = { + { float64_t2(0.0, 0.0), -20.0 }, + { float64_t2(-200.0, -200.0), 100.0 }, + { float64_t2(200.0, -100.0), 80.0 }, + }; + + core::vector indices = { + 0, 1, 2 + };*/ + + CTriangleMesh mesh; + mesh.setVertices(std::move(vertices)); + mesh.setIndices(std::move(indices)); + + DTMSettingsInfo dtmInfo{}; + //dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourSettingsCount = 2u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 10; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); + + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); + for (auto& v : mesh.m_vertices) + { + v.pos += float64_t2(450.0, 200.0); + v.height -= 10.0; + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); + } + else if (mode == ExampleMode::CASE_BUG) + { + CPolyline polyline; + + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 4.0f; + style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); + + for (uint32_t i = 0; i < 128u; ++i) + { + std::vector> quadBeziers; + curves::EllipticalArcInfo myCircle; + { + myCircle.majorAxis = { 0.05 , 0.0}; + myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; + myCircle.angleBounds = { + nbl::core::PI() * 0.0, + nbl::core::PI() * 2.0 + }; + myCircle.eccentricity = 1.0; + } + + curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void + { + quadBeziers.push_back(info); + }; + + curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); + polyline.addQuadBeziers(quadBeziers); + // drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + polyline.clearEverything(); + } + + // Testing Fixed Geometry + { + float64_t2 line0[2u] = + { + float64_t2(-1.0, 0.0), + float64_t2(+1.0, 0.0), + }; + float64_t2 line1[3u] = + { + float64_t2(0.0, -1.0), + float64_t2(0.0, +1.0), + float64_t2(+1.0, +1.0), + }; + + float64_t3x3 translateMat = + { + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t angle = m_timeElapsed * 0.001; + float64_t2 dir = float64_t2{ cos(angle), sin(angle) }; + float64_t3x3 rotateMat = + { + dir.x, -dir.y, 0.0, + dir.y, dir.x, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t2 scale = float64_t2{ 100.0, 100.0 }; + float64_t3x3 scaleMat = + { + scale.x, 0.0, 0.0, + 0.0, scale.y, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t3x3 transformation = nbl::hlsl::mul(translateMat, nbl::hlsl::mul(rotateMat, scaleMat)); + polyline.addLinePoints(line0); + polyline.addLinePoints(line1); + polyline.preprocessPolylineWithStyle(style); + // drawResourcesFiller.drawPolyline(polyline, intendedNextSubmit); + drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::TT_FIXED_SCREENSPACE_SIZE, intendedNextSubmit); + } + } + else if (mode == ExampleMode::CASE_11) + { + DTMSettingsInfo dtmInfo{}; + dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 2.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourSettingsCount = 2u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 10; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + animatedAlpha = 1.0f; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-20.0f, float32_t4(0.0f, 0.5f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 0.7f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + constexpr float HeightMapCellWidth = 20.0f; + const auto heightMapExtent = gridDTMHeightMap->getCreationParameters().extent; + assert(heightMapExtent.width > 0 && heightMapExtent.height > 0); + + float64_t2 worldSpaceExtents; + const float64_t2 topLeft = { -400.0f, 400.0f }; + worldSpaceExtents.x = (heightMapExtent.width - 1) * HeightMapCellWidth; + worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; + const uint64_t heightMapTextureID = 0ull; + + constexpr bool DrawGridOnly = false; + + if(DrawGridOnly) + { + dtmInfo.mode = E_DTM_MODE::OUTLINE; + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit, DrawGridOnly); + } + else + { + StaticImageInfo heightMapStaticImageInfo = { + .imageID = heightMapTextureID, + .cpuImage = gridDTMHeightMap, + .forceUpdate = false, + .imageViewFormatOverride = asset::E_FORMAT::EF_R32G32B32A32_UINT // for now we use only R32G32B32A32_* anyway + }; + + if (!drawResourcesFiller.ensureStaticImageAvailability(heightMapStaticImageInfo, intendedNextSubmit)) + m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + } + + // draw test polyline +#if 0 + { + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 0.0f; + style.worldSpaceLineWidth = 15.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + + CPolyline polyline; + { + std::vector linePoints; + linePoints.push_back(topLeft); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, 0.0)); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, -worldSpaceExtents.y)); + linePoints.push_back(topLeft + float64_t2(0.0, -worldSpaceExtents.y)); + linePoints.push_back(topLeft); + polyline.addLinePoints(linePoints); + } + + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + } +#endif + } } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) @@ -3243,10 +3709,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } protected: - std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; + std::vector> replayCaches = {}; // vector because there can be overflow submits + bool finishedCachingDraw = false; + bool fragmentShaderInterlockEnabled = false; core::smart_refctd_ptr m_inputSystem; @@ -3264,7 +3732,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot - smart_refctd_ptr msdfTextureSampler; + smart_refctd_ptr msdfImageSampler; + smart_refctd_ptr staticImageSampler; smart_refctd_ptr m_globalsBuffer; smart_refctd_ptr descriptorSet0; @@ -3300,6 +3769,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::vector> m_shapeMSDFImages = {}; + std::vector> sampleImages; + smart_refctd_ptr gridDTMHeightMap; + static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; @@ -3314,3 +3786,4 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; NBL_MAIN_FUNC(ComputerAidedDesign) + diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl index 82a646319..691cd3d3b 100644 --- a/62_CAD/shaders/geotexture/common.hlsl +++ b/62_CAD/shaders/geotexture/common.hlsl @@ -25,7 +25,7 @@ struct PSInput [[vk::push_constant]] GeoTextureOBB geoTextureOBB; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] Texture2D geoTexture : register(t0); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 392e796f4..255c46d8a 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,7 +1,14 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#define NBL_FORCE_EMULATED_FLOAT_64 +#ifdef __HLSL_VERSION +#ifndef NBL_USE_SPIRV_BUILTINS +#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps +#endif +#endif + +// TODO[Erfan]: Turn off in the future, but keep enabled to test +// #define NBL_FORCE_EMULATED_FLOAT_64 #include #include @@ -13,17 +20,14 @@ #ifdef __HLSL_VERSION #include -#include #endif using namespace nbl::hlsl; - -// because we can't use jit/device_capabilities.hlsl in c++ code #ifdef __HLSL_VERSION -using pfloat64_t = portable_float64_t; -using pfloat64_t2 = portable_float64_t2; -using pfloat64_t3 = portable_float64_t3; +using pfloat64_t = portable_float64_t; +using pfloat64_t2 = portable_float64_t2; +using pfloat64_t3 = portable_float64_t3; #else using pfloat64_t = float64_t; using pfloat64_t2 = nbl::hlsl::vector; @@ -32,40 +36,58 @@ using pfloat64_t3 = nbl::hlsl::vector; using pfloat64_t3x3 = portable_matrix_t3x3; -// TODO: Compute this in a compute shader from the world counterparts -// because this struct includes NDC coordinates, the values will change based camera zoom and move -// of course we could have the clip values to be in world units and also the matrix to transform to world instead of ndc but that requires extra computations(matrix multiplications) per vertex -struct ClipProjectionData +struct PushConstants +{ + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + uint32_t isDTMRendering; +}; + +#ifdef __HLSL_VERSION +NBL_CONSTEXPR float InvalidGridDTMHeightValue = asfloat(0x7FC00000); + +bool isInvalidGridDtmHeightValue(float value) +{ + return isnan(value); +} + +#endif + +struct WorldClipRect { - pfloat64_t3x3 projectionToNDC; // 72 -> because we use scalar_layout - float32_t2 minClipNDC; // 80 - float32_t2 maxClipNDC; // 88 + pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) + pfloat64_t2 maxClip; // max clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) }; +struct Pointers +{ + uint64_t lineStyles; + uint64_t dtmSettings; + uint64_t customProjections; + uint64_t customClipRects; + uint64_t mainObjects; + uint64_t drawObjects; + uint64_t geometryBuffer; +}; #ifndef __HLSL_VERSION -static_assert(offsetof(ClipProjectionData, projectionToNDC) == 0u); -static_assert(offsetof(ClipProjectionData, minClipNDC) == 72u); -static_assert(offsetof(ClipProjectionData, maxClipNDC) == 80u); +static_assert(sizeof(Pointers) == 56u); #endif struct Globals { - ClipProjectionData defaultClipProjection; // 88 - pfloat64_t screenToWorldRatio; // 96 - pfloat64_t worldToScreenRatio; // 100 - uint32_t2 resolution; // 108 - float antiAliasingFactor; // 112 - float miterLimit; // 116 - float32_t2 _padding; // 128 + Pointers pointers; + pfloat64_t3x3 defaultProjectionToNDC; + pfloat64_t3x3 screenToWorldScaleTransform; // Pre-multiply your transform with this to scale in screen space (e.g., scale 100.0 means 100 screen pixels). + float screenToWorldRatio; + float worldToScreenRatio; + uint32_t2 resolution; + float antiAliasingFactor; + uint32_t miterLimit; + uint32_t currentlyActiveMainObjectIndex; // for alpha resolve to skip resolving activeMainObjectIdx and prep it for next submit + float32_t _padding; }; - #ifndef __HLSL_VERSION -static_assert(offsetof(Globals, defaultClipProjection) == 0u); -static_assert(offsetof(Globals, screenToWorldRatio) == 88u); -static_assert(offsetof(Globals, worldToScreenRatio) == 96u); -static_assert(offsetof(Globals, resolution) == 104u); -static_assert(offsetof(Globals, antiAliasingFactor) == 112u); -static_assert(offsetof(Globals, miterLimit) == 116u); +static_assert(sizeof(Globals) == 232u); #endif #ifdef __HLSL_VERSION @@ -100,6 +122,18 @@ pfloat64_t2 transformVectorNdc(NBL_CONST_REF_ARG(pfloat64_t3x3) transformation, } #endif +enum class MainObjectType : uint32_t +{ + NONE = 0u, + POLYLINE, + HATCH, + TEXT, + STATIC_IMAGE, + DTM, + GRID_DTM, + STREAMED_IMAGE, +}; + enum class ObjectType : uint32_t { LINE = 0u, @@ -107,7 +141,10 @@ enum class ObjectType : uint32_t CURVE_BOX = 2u, POLYLINE_CONNECTOR = 3u, FONT_GLYPH = 4u, - IMAGE = 5u + STATIC_IMAGE = 5u, + TRIANGLE_MESH = 6u, + GRID_DTM = 7u, + STREAMED_IMAGE = 8u, }; enum class MajorAxis : uint32_t @@ -116,12 +153,23 @@ enum class MajorAxis : uint32_t MAJOR_Y = 1u, }; +enum TransformationType +{ + TT_NORMAL = 0, + TT_FIXED_SCREENSPACE_SIZE +}; + + // Consists of multiple DrawObjects +// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound +// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work struct MainObject { uint32_t styleIdx; - uint32_t pad; // do I even need this on the gpu side? it's stored in structured buffer not bda - uint64_t clipProjectionAddress; + uint32_t dtmSettingsIdx; + uint32_t customProjectionIndex; + uint32_t customClipRectIndex; + uint32_t transformationType; // todo pack later, it's just 2 possible values atm }; struct DrawObject @@ -131,6 +179,7 @@ struct DrawObject uint64_t geometryAddress; }; +// Goes into geometry buffer, needs to be aligned by 8 struct LinePointInfo { pfloat64_t2 p; @@ -138,6 +187,7 @@ struct LinePointInfo float32_t stretchValue; }; +// Goes into geometry buffer, needs to be aligned by 8 struct QuadraticBezierInfo { nbl::hlsl::shapes::QuadraticBezier shape; // 48bytes = 3 (control points) x 16 (float64_t2) @@ -148,6 +198,7 @@ struct QuadraticBezierInfo static_assert(offsetof(QuadraticBezierInfo, phaseShift) == 48u); #endif +// Goes into geometry buffer, needs to be aligned by 8 struct GlyphInfo { pfloat64_t2 topLeft; // 2 * 8 = 16 bytes @@ -192,14 +243,68 @@ struct GlyphInfo } }; +// Goes into geometry buffer, needs to be aligned by 8 struct ImageObjectInfo { - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) + uint32_t textureID; // 4 bytes (32) +}; + +// Goes into geometry buffer, needs to be aligned by 8 +// Currently a simple OBB like ImageObject, but later will be fullscreen with additional info about UV offset for toroidal(mirror) addressing +struct GeoreferencedImageInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) uint32_t textureID; // 4 bytes (32) }; +// Goes into geometry buffer, needs to be aligned by 8 +struct GridDTMInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t2 worldSpaceExtents; // 16 bytes (32) + uint32_t textureID; // 4 bytes (36) + float gridCellWidth; // 4 bytes (40) + float thicknessOfTheThickestLine; // 4 bytes (44) + float _padding; // 4 bytes (48) +}; + +enum E_CELL_DIAGONAL : uint32_t +{ + TOP_LEFT_TO_BOTTOM_RIGHT = 0u, + BOTTOM_LEFT_TO_TOP_RIGHT = 1u, + INVALID = 2u +}; + +#ifndef __HLSL_VERSION + +// sets last bit of data to 1 or 0 depending on diagonalMode +static void setDiagonalModeBit(float* data, E_CELL_DIAGONAL diagonalMode) +{ + if (diagonalMode == E_CELL_DIAGONAL::INVALID) + return; + + uint32_t dataAsUint = reinterpret_cast(*data); + constexpr uint32_t HEIGHT_VALUE_MASK = 0xFFFFFFFEu; + dataAsUint &= HEIGHT_VALUE_MASK; + dataAsUint |= static_cast(diagonalMode); + *data = reinterpret_cast(dataAsUint); + + uint32_t dataAsUintDbg = reinterpret_cast(*data); +} + +#endif + +// Top left corner holds diagonal mode info of a cell +static E_CELL_DIAGONAL getDiagonalModeFromCellCornerData(uint32_t cellCornerData) +{ + return (cellCornerData & 0x1u) ? BOTTOM_LEFT_TO_TOP_RIGHT : TOP_LEFT_TO_BOTTOM_RIGHT; +} + static uint32_t packR11G11B10_UNORM(float32_t3 color) { // Scale and convert to integers @@ -241,6 +346,7 @@ struct PolylineConnector }; // NOTE: Don't attempt to pack curveMin/Max to uints because of limited range of values, we need the logarithmic precision of floats (more precision near 0) +// Goes into geometry buffer, needs to be aligned by 8 struct CurveBox { // will get transformed in the vertex shader, and will be calculated on the cpu when generating these boxes @@ -262,9 +368,15 @@ NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff; NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; -// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like LineStyle stipple patterns) and requires upper_bound to figure out the color based on height value. +// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like stipple patterns) and requires upper_bound to figure out the color based on height value. // We'll discuss that later or what it will be looking like and how it's gonna get passed to our shaders. +struct TriangleMeshVertex +{ + pfloat64_t2 pos; + pfloat64_t height; // TODO: can be of type float32_t instead +}; + // The color parameter is also used for styling non-curve objects such as text glyphs and hatches with solid color struct LineStyle { @@ -316,6 +428,73 @@ struct LineStyle } }; +enum E_DTM_MODE +{ + OUTLINE = 1 << 0, + CONTOUR = 1 << 1, + HEIGHT_SHADING = 1 << 2, +}; + +enum class E_HEIGHT_SHADING_MODE : uint32_t +{ + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS +}; + +struct DTMContourSettings +{ + uint32_t contourLineStyleIdx; // index into line styles + float contourLinesStartHeight; + float contourLinesEndHeight; + float contourLinesHeightInterval; +}; + +struct DTMHeightShadingSettings +{ + const static uint32_t HeightColorMapMaxEntries = 16u; + + // height-color map + float intervalLength; + float intervalIndexToHeightMultiplier; + int isCenteredShading; + + uint32_t heightColorEntryCount; + float heightColorMapHeights[HeightColorMapMaxEntries]; + float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; + + E_HEIGHT_SHADING_MODE determineHeightShadingMode() + { + if (nbl::hlsl::isinf(intervalLength)) + return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalLength == 0.0f) + return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } +}; + +// Documentation and explanation of variables in DTMSettingsInfo +struct DTMSettings +{ + const static uint32_t MaxContourSettings = 8u; + + uint32_t mode; // E_DTM_MODE + + // outline + uint32_t outlineLineStyleIdx; + + // contour lines + uint32_t contourSettingsCount; + DTMContourSettings contourSettings[MaxContourSettings]; + + // height shading + DTMHeightShadingSettings heightShadingSettings; + + bool drawOutlineEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::OUTLINE) != 0u; } + bool drawContourEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::CONTOUR) != 0u; } + bool drawHeightShadingEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::HEIGHT_SHADING) != 0u; } +}; + #ifndef __HLSL_VERSION inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) { @@ -338,22 +517,106 @@ inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) return isStipplePatternArrayEqual; } + +inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) +{ + if (lhs.mode != rhs.mode) + return false; + + if (lhs.drawOutlineEnabled()) + { + if (lhs.outlineLineStyleIdx != rhs.outlineLineStyleIdx) + return false; + } + + if (lhs.drawContourEnabled()) + { + if (lhs.contourSettingsCount != rhs.contourSettingsCount) + return false; + if (memcmp(lhs.contourSettings, rhs.contourSettings, lhs.contourSettingsCount * sizeof(DTMContourSettings))) + return false; + } + + if (lhs.drawHeightShadingEnabled()) + { + if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength) + return false; + if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier) + return false; + if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading) + return false; + if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount) + return false; + + + if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + return false; + if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + return false; + } + + return true; +} #endif +NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128; NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint64_t InvalidClipProjectionAddress = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; + +// Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; -// TODO: get automatic version working on HLSL NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); + +// Text or MSDF Hatches NBL_CONSTEXPR float MSDFPixelRange = 4.0f; NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; NBL_CONSTEXPR float MSDFSize = 32.0f; NBL_CONSTEXPR uint32_t MSDFMips = 4; NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +// Used in CPU-side only for now +struct OrientedBoundingBox2D +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) +}; + +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); + +LineStyle loadLineStyle(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.lineStyles + index * sizeof(LineStyle), 8u); +} +DTMSettings loadDTMSettings(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 8u); +} +pfloat64_t3x3 loadCustomProjection(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customProjections + index * sizeof(pfloat64_t3x3), 8u); +} +WorldClipRect loadCustomClipRect(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customClipRects + index * sizeof(WorldClipRect), 8u); +} +MainObject loadMainObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 4u); +} +DrawObject loadDrawObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.drawObjects + index * sizeof(DrawObject), 8u); +} +#endif + + #endif diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 17c851a19..f378c44db 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -3,6 +3,15 @@ #include "../globals.hlsl" +// This function soley exists to match n4ce's behaviour, colors and color operations for DTMs, Curves, Lines, Hatches are done in linear space and then outputted to linear surface (as if surface had UNORM format, but ours is SRGB) +// We should do gamma "uncorrection" to account for the fact that our surface format is SRGB and will do gamma correction +void gammaUncorrect(inout float3 col) +{ + bool outputToSRGB = true; // TODO + float gamma = (outputToSRGB) ? 2.2f : 1.0f; + col.rgb = pow(col.rgb, gamma); +} + // TODO: Use these in C++ as well once numeric_limits compiles on C++ float32_t2 unpackCurveBoxUnorm(uint32_t2 value) { @@ -73,7 +82,12 @@ struct PSInput [[vk::location(2)]] nointerpolation float4 data3 : COLOR3; [[vk::location(3)]] nointerpolation float4 data4 : COLOR4; // Data segments that need interpolation, mostly for hatches - [[vk::location(5)]] float2 interp_data5 : COLOR5; + [[vk::location(5)]] float4 interp_data5 : COLOR5; +#ifdef FRAGMENT_SHADER_INPUT + [[vk::location(6)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR6; +#else + [[vk::location(6)]] float3 vertexScreenSpacePos : COLOR6; +#endif // ArcLenCalculator // Set functions used in vshader, get functions used in fshader @@ -98,7 +112,7 @@ struct PSInput void setCurrentWorldToScreenRatio(float worldToScreen) { interp_data5.y = worldToScreen; } float getCurrentWorldToScreenRatio() { return interp_data5.y; } - + /* LINE */ float2 getLineStart() { return data2.xy; } float2 getLineEnd() { return data2.zw; } @@ -208,19 +222,41 @@ struct PSInput void setImageUV(float2 uv) { interp_data5.xy = uv; } void setImageTextureId(uint32_t textureId) { data2.x = asfloat(textureId); } + + /* TRIANGLE MESH */ + +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } +#else // fragment shader + float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } +#endif + + /* GRID DTM */ + uint getGridDTMHeightTextureID() { return data1.z; } + float2 getGridDTMScreenSpaceTopLeft() { return data2.xy; } + float2 getGridDTMScreenSpaceGridExtents() { return data2.zw; } + float getGridDTMScreenSpaceCellWidth() { return data3.x; } + float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } + + void setGridDTMHeightTextureID(uint textureID) { data1.z = textureID; } + void setGridDTMScreenSpaceTopLeft(float2 screenSpaceTopLeft) { data2.xy = screenSpaceTopLeft; } + void setGridDTMScreenSpaceGridExtents(float2 screenSpaceGridExtends) { data2.zw = screenSpaceGridExtends; } + void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data3.x = screenSpaceGridWidth; } + void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); -[[vk::binding(1, 0)]] StructuredBuffer drawObjects : register(t0); -[[vk::binding(2, 0)]] StructuredBuffer mainObjects : register(t1); -[[vk::binding(3, 0)]] StructuredBuffer lineStyles : register(t2); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] Texture2DArray msdfTextures : register(t3); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] SamplerState msdfSampler : register(s3); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl + +[[vk::push_constant]] PushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] Texture2DArray msdfTextures : register(t4); +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState msdfSampler : register(s4); -[[vk::binding(5, 0)]] SamplerState textureSampler : register(s4); -[[vk::binding(6, 0)]] Texture2D textures[128] : register(t4); +[[vk::binding(2, 0)]] SamplerState textureSampler : register(s5); +[[vk::binding(3, 0)]] Texture2D textures[ImagesBindingArraySize] : register(t5); +[[vk::binding(3, 0)]] Texture2D texturesU32[ImagesBindingArraySize] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl new file mode 100644 index 000000000..e90f685ba --- /dev/null +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -0,0 +1,545 @@ +#ifndef _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ + +#include "line_style.hlsl" + +namespace dtm +{ + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMHeightShadingSettings settings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return settings.heightColorMapHeights[ix]; + } +}; + +float dot2(in float2 vec) +{ + return dot(vec, vec); +} + +struct HeightSegmentTransitionData +{ + float currentHeight; + float4 currentSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; +}; + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) +{ + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} + +// Computes the continuous position of a height value within uniform intervals. +// flooring this value will give the interval index +// +// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the +// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. +// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans +// [minHeight, minHeight + intervalLength]. +// +// Parameters: +// - height: The height value to classify. +// - minHeight: The reference starting height for interval calculation. +// - intervalLength: The length of each interval segment. +// - isCenteredShading: Whether to center the shading intervals around minHeight. +// +// Returns: +// - A float representing the continuous position within the interval grid. +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ((height - minHeight) / intervalLength + 0.5f); + else + return ((height - minHeight) / intervalLength); +} + +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) +{ + float denom = (v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y); + float u = ((v2.y - v3.y) * (p.x - v3.x) + (v3.x - v2.x) * (p.y - v3.y)) / denom; + float v = ((v3.y - v1.y) * (p.x - v3.x) + (v1.x - v3.x) * (p.y - v3.y)) / denom; + float w = 1.0 - u - v; + return float3(u, v, w); +} + +float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) +{ + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + // HEIGHT SHADING + const uint32_t heightMapSize = settings.heightColorEntryCount; + float minShadingHeight = settings.heightColorMapHeights[0]; + float maxShadingHeight = settings.heightColorMapHeights[heightMapSize - 1]; + + if (heightMapSize > 0) + { + // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez + float2 e0 = (v[1] - v[0]).xy; + float2 e1 = (v[2] - v[1]).xy; + float2 e2 = (v[0] - v[2]).xy; + + float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); + float2 v0 = fragPos - v[0].xy; + float2 v1 = fragPos - v[1].xy; + float2 v2 = fragPos - v[2].xy; + + float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); + float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); + float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); + + float line0Sdf = distanceToLine0 * triangleAreaSign * sign(v0.x * e0.y - v0.y * e0.x); + float line1Sdf = distanceToLine1 * triangleAreaSign * sign(v1.x * e1.y - v1.y * e1.x); + float line2Sdf = distanceToLine2 * triangleAreaSign * sign(v2.x * e2.y - v2.y * e2.x); + float line3Sdf = (minShadingHeight - height) / heightDeriv; + float line4Sdf = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = max(line0Sdf, line1Sdf); + convexPolygonSdf = max(convexPolygonSdf, line2Sdf); + convexPolygonSdf = max(convexPolygonSdf, line3Sdf); + convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + + outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor + globals.antiAliasingFactor, convexPolygonSdf); + + // calculate height color + E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); + if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); + float positionWithinInterval = frac(intervalPosition); + int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); + + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; + if (blendWithPrev) + { + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; + } + else + { + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + } + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + + outputColor.a *= localHeightColor.a; + outputColor.rgb = localHeightColor.rgb * outputColor.a + outputColor.rgb * (1.0f - outputColor.a); + } + } + + return outputColor; +} + +float4 calculateDTMContourColor(in DTMContourSettings contourSettings, in float3 v[3], in float2 fragPos, in float height) +{ + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + LineStyle contourStyle = loadLineStyle(contourSettings.contourLineStyleIdx); + const float contourThickness = (contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + float stretch = 1.0f; + float phaseShift = 0.0f; + + // TODO: move to ubo or push constants + const float startHeight = contourSettings.contourLinesStartHeight; + const float endHeight = contourSettings.contourLinesEndHeight; + const float interval = contourSettings.contourLinesHeightInterval; + + // TODO: can be precomputed + const int maxContourLineIdx = (endHeight - startHeight) / interval; + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight) / interval + 0.5f); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + // TODO: case where heights we are looking for are on all three vertices + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 2) + break; + + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + if (p1.z < p0.z) + nbl::hlsl::swap(p0, p1); + + float minHeight = p0.z; + float maxHeight = p1.z; + + if (height >= minHeight && height <= maxHeight) + { + float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); + float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); + + contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); + ++contourLinePointsIdx; + } + } + + if (contourLinePointsIdx == 2) + { + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + + float distance = nbl::hlsl::numeric_limits::max; + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, distance); + outputColor.a *= contourStyle.color.a; + outputColor.rgb = contourStyle.color.rgb; + + return outputColor; + } + + return float4(0.0f, 0.0f, 0.0f, 0.0f); +} + +float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in float2 fragPos) +{ + float4 outputColor; + + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 1.0f; + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + } + + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + +float4 calculateGridDTMOutlineColor(in uint outlineLineStyleIdx, in nbl::hlsl::shapes::Line outlineLineSegments[2], in float2 fragPos, in float phaseShift) +{ + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + const float stretch = 1.0f; + + // find distance to outline + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + distance = ClippedSignedDistance >::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(outlineLineSegments[i]); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, outlineLineSegments[i], arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + } + + float4 outputColor; + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + +float4 blendUnder(in float4 dstColor, in float4 srcColor) +{ + dstColor.rgb = dstColor.rgb + (1 - dstColor.a) * srcColor.a * srcColor.rgb; + dstColor.a = (1.0f - srcColor.a) * dstColor.a + srcColor.a; + + return dstColor; +} + +E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) +{ + float4 cellHeights = asfloat(cellData); + + const bool4 invalidHeights = bool4( + isInvalidGridDtmHeightValue(cellHeights.x), + isInvalidGridDtmHeightValue(cellHeights.y), + isInvalidGridDtmHeightValue(cellHeights.z), + isInvalidGridDtmHeightValue(cellHeights.w) + ); + + int invalidHeightsCount = 0; + for (int i = 0; i < 4; ++i) + invalidHeightsCount += int(invalidHeights[i]); + + if (invalidHeightsCount == 0) + { + E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellData.w); + return getDiagonalModeFromCellCornerData(cellData.w); + } + + if (invalidHeightsCount > 1) + return INVALID; + + if (invalidHeights.x || invalidHeights.z) + return TOP_LEFT_TO_BOTTOM_RIGHT; + else if (invalidHeights.y || invalidHeights.w) + return BOTTOM_LEFT_TO_TOP_RIGHT; + + return INVALID; +} + +struct GridDTMTriangle +{ + float3 vertices[3]; +}; + +/** +* grid consists of square cells and cells are divided into two triangles: +* depending on mode it is +* either: or: +* v2a-------v1 v0-------v2b +* | A / | | \ B | +* | / | | \ | +* | / B | | A \ | +* v0-------v2b v2a-------v1 +*/ +struct GridDTMCell +{ + GridDTMTriangle triangleA; + GridDTMTriangle triangleB; +}; + +struct GridDTMHeightMapData +{ + // heihts.x - bottom left texel + // heihts.y - bottom right texel + // heihts.z - top right texel + // heihts.w - top left texel + float4 heights; + E_CELL_DIAGONAL cellDiagonal; +}; + +GridDTMHeightMapData retrieveGridDTMCellDataFromHeightMap(in float2 gridExtents, in float2 cellCoords, const float cellWidth, in Texture2D heightMap) +{ + GridDTMHeightMapData output; + + const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); + const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; + uint32_t4 cellData = heightMap.Gather(textureSampler, float2(location.x, location.y), 0); + + printf("%u %u %u %u", cellData.x, cellData.y, cellData.z, cellData.w); + + output.heights = asfloat(cellData); + output.cellDiagonal = dtm::resolveGridDTMCellDiagonal(cellData); + return output; +} + +GridDTMCell calculateCellTriangles(in float2 topLeft, in float2 gridExtents, in float2 cellCoords, const float cellWidth, in Texture2D heightMap) +{ + GridDTMCell output; + + // heightData.heihts.x - bottom left texel + // heightData.heihts.y - bottom right texel + // heightData.heihts.z - top right texel + // heightData.heihts.w - top left texel + dtm::GridDTMHeightMapData heightData = dtm::retrieveGridDTMCellDataFromHeightMap(gridExtents, cellCoords, cellWidth, heightMap); + const bool diagonalFromTopLeftToBottomRight = heightData.cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + + if (diagonalFromTopLeftToBottomRight) + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + } + else + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + } + + // move from grid space to screen space + [unroll] + for (int i = 0; i < 3; ++i) + { + output.triangleA.vertices[i].xy += topLeft; + output.triangleB.vertices[i].xy += topLeft; + } + + return output; +} + +} + +#endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index e850622c3..25564a964 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -1,655 +1,611 @@ -#include "common.hlsl" -#include -#include -#include -#include -#include -#include -#include -#include - -template -struct DefaultClipper -{ - using float_t2 = vector; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; - - static DefaultClipper construct() - { - DefaultClipper ret; - return ret; - } - - inline float_t2 operator()(const float_t t) - { - const float_t ret = clamp(t, 0.0, 1.0); - return float_t2(ret, ret); - } -}; - -// for usage in upper_bound function -struct StyleAccessor -{ - LineStyle style; - using value_type = float; - - float operator[](const uint32_t ix) - { - return style.getStippleValue(ix); - } -}; - -template -struct StyleClipper -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; - - static StyleClipper construct( - LineStyle style, - CurveType curve, - typename CurveType::ArcLengthCalculator arcLenCalc, - float phaseShift, - float stretch, - float worldToScreenRatio) - { - StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; - - // values for non-uniform stretching with a rigid segment - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - // rigidSegment info in old non stretched pattern - ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; - ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; - ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; - // stretch value for non rigid segments - ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); - // rigidSegment info to new stretched pattern - ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start - ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len - ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end - } - else - { - ret.nonRigidSegmentStretchValue = stretch; - } - - return ret; - } - - // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes - // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern - float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) - { - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment - ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment - ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment - ret *= stretch; - return ret; - } - else - { - return normalizedPlaceInPattern; - } - } - - float_t2 operator()(float_t t) - { - // basicaly 0.0 and 1.0 but with a guardband to discard outside the range - const float_t minT = 0.0 - 1.0; - const float_t maxT = 1.0 + 1.0; - - StyleAccessor styleAccessor = { style }; - const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; - const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); - - const float_t arcLen = arcLenCalc.calcArcLen(t); - const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); - float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); - uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); - - const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; - float_t2 ret = float_t2(InvalidT, InvalidT); - - // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections - const bool notInDrawSection = patternIdx & 0x1; - - // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections - float_t minDrawT = 0.0; - float_t maxDrawT = 1.0; - { - float_t normalizedPlaceInPatternBegin = frac(phaseShift); - normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); - uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); - const bool BeginInNonDrawSection = patternIdxBegin & 0x1; - - if (BeginInNonDrawSection) - { - float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; - diffToRightDrawableSection -= normalizedPlaceInPatternBegin; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; - minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); - } - - // Completely in non-draw section -> clip away: - if (minDrawT >= 1.0) - return ret; - - const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); - const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); - float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); - uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); - const bool EndInNonDrawSection = patternIdxEnd & 0x1; - - if (EndInNonDrawSection) - { - float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; - maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); - } - } - - if (notInDrawSection) - { - float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - - float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; - const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; - float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); - t0 = clamp(t0, minDrawT, maxDrawT); - - float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; - diffToRightDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; - const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; - float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); - t1 = clamp(t1, minDrawT, maxDrawT); - - ret = float_t2(t0, t1); - } - else - { - t = clamp(t, minDrawT, maxDrawT); - ret = float_t2(t, t); - } - - return ret; - } - - LineStyle style; - CurveType curve; - typename CurveType::ArcLengthCalculator arcLenCalc; - float phaseShift; - float stretch; - float worldToScreenRatio; - // precomp value for non uniform stretching - float rigidSegmentStart; - float rigidSegmentEnd; - float rigidSegmentLen; - float nonRigidSegmentStretchValue; -}; - -template > -struct ClippedSignedDistance -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - - const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) - { - typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); - - const float_t InvalidT = nbl::hlsl::numeric_limits::max; - // TODO: Fix and test, we're not working with squared distance anymore - const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? - - bool clipped = false; - float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; - float_t closestT = InvalidT; - [[unroll(CurveType::MaxCandidates)]] - for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) - { - const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); - if (candidateDistanceSquared < closestDistanceSquared) - { - float_t2 snappedTs = clipper(candidates[i]); - - if (snappedTs[0] == InvalidT) - { - continue; - } - - if (snappedTs[0] != candidates[i]) - { - // left snapped or clamped - const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); - if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[0]; - closestDistanceSquared = leftSnappedCandidateDistanceSquared; - } - - if (snappedTs[0] != snappedTs[1]) - { - // right snapped or clamped - const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); - if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[1]; - closestDistanceSquared = rightSnappedCandidateDistanceSquared; - } - } - } - else - { - // no snapping - if (candidateDistanceSquared < closestDistanceSquared) - { - clipped = false; - closestT = candidates[i]; - closestDistanceSquared = candidateDistanceSquared; - } - } - } - } - - - float_t roundedDistance = closestDistanceSquared - thickness; - if(!isRoadStyle) - { - return roundedDistance; - } - else - { - const float_t aaWidth = globals.antiAliasingFactor; - float_t rectCappedDistance = roundedDistance; - - if (clipped) - { - float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); - rectCappedDistance = capSquare(q, thickness, aaWidth); - } - - return rectCappedDistance; - } - } - - static float capSquare(float_t2 q, float_t th, float_t aaWidth) - { - float_t2 d = abs(q) - float_t2(aaWidth, th); - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); - } -}; - -// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ -float sdTrapezoid(float2 p, float r1, float r2, float he) -{ - float2 k1 = float2(r2, he); - float2 k2 = float2(r2 - r1, 2.0 * he); - - p.x = abs(p.x); - float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); - float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); - - float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; - - return s * sqrt(min(dot(ca,ca), dot(cb,cb))); -} - -// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries -float2 sdLineDstVec(float2 P, float2 A, float2 B) -{ - const float2 PA = P - A; - const float2 BA = B - A; - float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); - return PA - BA * h; -} - -float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) -{ - float h = length(b - a) / 2.0; - float2 d = normalize(b - a); - float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); - p = mul(rot, p); - p.y -= h - thickness; - return sdTrapezoid(p, ra, rb, h); -} - -typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; -typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; - -// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm -// because there is no `if constexpr` in hlsl -// @params -// textureColor: color sampled from a texture -// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. -template -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); - -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - uint32_t styleIdx = mainObjects[currentMainObjectIdx].styleIdx; - if (!colorFromTexture) - { - float32_t4 col = lineStyles[styleIdx].color; - col.w *= localAlpha; - return float4(col); - } - else - return float4(localTextureColor, localAlpha); -} -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - float32_t4 color; - nbl::hlsl::spirv::beginInvocationInterlockEXT(); - - const uint32_t packedData = pseudoStencil[fragCoord]; - - const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); - const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); - const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored - const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; - - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) - { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); - } - - // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. - // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. - if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) - { - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); - if (colorFromTexture) // writing color from texture - colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); - } - - nbl::hlsl::spirv::endInvocationInterlockEXT(); - - if (!resolve) - discard; - - // draw with previous geometry's style's color or stored in texture buffer :kek: - // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj - if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; - color.a *= float(storedQuantizedAlpha) / 255.f; - - return color; -} - -[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] -[shader("pixel")] -float4 fragMain(PSInput input) : SV_TARGET -{ - float localAlpha = 0.0f; - float3 textureColor = float3(0, 0, 0); // color sampled from a texture - - // TODO[Przemek]: Disable All the object rendering paths if you want. - ObjectType objType = input.getObjType(); - const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); - const MainObject mainObj = mainObjects[currentMainObjectIdx]; - - // figure out local alpha with sdf - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) - { - float distance = nbl::hlsl::numeric_limits::max; - if (objType == ObjectType::LINE) - { - const float2 start = input.getLineStart(); - const float2 end = input.getLineEnd(); - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - - LineStyle style = lineStyles[styleIdx]; - - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::QUAD_BEZIER) - { - nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); - nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); - - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - LineStyle style = lineStyles[styleIdx]; - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - BezierStyleClipper clipper = BezierStyleClipper::construct(lineStyles[styleIdx], quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); - distance = miterSDF( - P, - input.getLineThickness(), - input.getPolylineConnectorTrapezoidStart(), - input.getPolylineConnectorTrapezoidEnd(), - input.getPolylineConnectorTrapezoidLongBase(), - input.getPolylineConnectorTrapezoidShortBase()); - - } - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); - } - else if (objType == ObjectType::CURVE_BOX) - { - const float minorBBoxUV = input.getMinorBBoxUV(); - const float majorBBoxUV = input.getMajorBBoxUV(); - - nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); - nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); - nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); - nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); - - // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. - nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - - const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); - const float minEv = curveMinMinor.evaluate(minT); - - const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); - const float maxEv = curveMaxMinor.evaluate(maxT); - - const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; - const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; - - if (insideMinor && insideMajor) - { - localAlpha = 1.0; - } - else - { - // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) - // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor - const float InvalidT = nbl::hlsl::numeric_limits::max; - const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; - - const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); - - - float closestDistanceSquared = MAX_DISTANCE_SQUARED; - const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; - - if (minorBBoxUV < minEv) - { - // DO SDF of Min Curve - nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, - float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, - float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); - - nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = minCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - else if (minorBBoxUV > maxEv) - { - // Do SDF of Max Curve - nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, - float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, - float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); - nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - - if (!insideMajor) - { - const bool minLessThanMax = minEv < maxEv; - float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); - if (majorBBoxUV > 1.0) - { - const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveEnd; - } - else - { - const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveStart; - } - - const float majorDistSq = dot(majorDistVector, majorDistVector); - if (majorDistSq < closestDistanceSquared) - closestDistanceSquared = majorDistSq; - } - - const float dist = sqrt(closestDistanceSquared); - localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); - } - - LineStyle style = lineStyles[mainObj.styleIdx]; - uint32_t textureId = asuint(style.screenSpaceLineWidth); - if (textureId != InvalidTextureIdx) - { - // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected - // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); - localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); - } - } - else if (objType == ObjectType::FONT_GLYPH) - { - const float2 uv = input.getFontGlyphUV(); - const uint32_t textureId = input.getFontGlyphTextureId(); - - if (textureId != InvalidTextureIdx) - { - float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); - /* - explaining "*= exp2(max(mipLevel,0.0))" - Each mip level has constant MSDFPixelRange - Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip - As we go up 1 mip level, the msdf distance should be multiplied by 2.0 - While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. - It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! - - Alternatively you can think of it as doing this instead: - localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); - Which is reducing the aa feathering as we go up the mip levels. - to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph - */ - msdf *= exp2(max(mipLevel,0.0)); - - LineStyle style = lineStyles[mainObj.styleIdx]; - const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; - const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle - localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); - } - } - else if (objType == ObjectType::IMAGE) - { - const float2 uv = input.getImageUV(); - const uint32_t textureId = input.getImageTextureId(); - - if (textureId != InvalidTextureIdx) - { - float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); - textureColor = colorSample.rgb; - localAlpha = colorSample.a; - } - } - - uint2 fragCoord = uint2(input.position.xy); - - if (localAlpha <= 0) - discard; - - const bool colorFromTexture = objType == ObjectType::IMAGE; - - // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. - // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here - // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. - return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); -} +#define FRAGMENT_SHADER_INPUT +#include "common.hlsl" +#include "dtm.hlsl" +#include +#include +#include +#include +#include +#include +#include +//#include + +// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ +float sdTrapezoid(float2 p, float r1, float r2, float he) +{ + float2 k1 = float2(r2, he); + float2 k2 = float2(r2 - r1, 2.0 * he); + + p.x = abs(p.x); + float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); + float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); + + float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; + + return s * sqrt(min(dot(ca,ca), dot(cb,cb))); +} + +// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries +float2 sdLineDstVec(float2 P, float2 A, float2 B) +{ + const float2 PA = P - A; + const float2 BA = B - A; + float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); + return PA - BA * h; +} + +float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) +{ + float h = length(b - a) / 2.0; + float2 d = normalize(b - a); + float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); + p = mul(rot, p); + p.y -= h - thickness; + return sdTrapezoid(p, ra, rb, h); +} + +// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm +// because there is no `if constexpr` in hlsl +// @params +// textureColor: color sampled from a texture +// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. +template +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); + +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; + if (!colorFromTexture) + { + float32_t4 col = loadLineStyle(styleIdx).color; + col.w *= localAlpha; + return float4(col); + } + else + return float4(localTextureColor, localAlpha); +} +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + float32_t4 color; + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + const uint32_t packedData = pseudoStencil[fragCoord]; + + const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); + const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); + const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored + const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; + uint32_t toResolveStyleIdx = InvalidStyleIdx; + + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + + // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. + // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. + if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) + { + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); + if (colorFromTexture) // writing color from texture + colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); + } + + nbl::hlsl::spirv::endInvocationInterlockEXT(); + + if (!resolve) + discard; + + // draw with previous geometry's style's color or stored in texture buffer :kek: + // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj + if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style + { + color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + + color.a *= float(storedQuantizedAlpha) / 255.f; + + return color; +} + +bool isLineValid(in nbl::hlsl::shapes::Line l) +{ + bool isAnyLineComponentNaN = any(bool4(isnan(l.P0.x), isnan(l.P0.y), isnan(l.P1.x), isnan(l.P1.y))); + if (isAnyLineComponentNaN) + return false; + return true; +} + +[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] +[shader("pixel")] +float4 fragMain(PSInput input) : SV_TARGET +{ + float localAlpha = 0.0f; + float3 textureColor = float3(0, 0, 0); // color sampled from a texture + + ObjectType objType = input.getObjType(); + const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); + const MainObject mainObj = loadMainObject(currentMainObjectIdx); + + if (pc.isDTMRendering) + { + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + float3 v[3]; + v[0] = input.getScreenSpaceVertexAttribs(0); + v[1] = input.getScreenSpaceVertexAttribs(1); + v[2] = input.getScreenSpaceVertexAttribs(2); + + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0].xy, v[1].xy, v[2].xy, input.position.xy); + float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + float heightDeriv = fwidth(height); + + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + if (dtmSettings.drawOutlineEnabled()) // TODO: do i need 'height' paramter here? + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy)); + if (dtmSettings.drawContourEnabled()) + { + for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); + } + if (dtmSettings.drawHeightShadingEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + + textureColor = dtmColor.rgb / dtmColor.a; + localAlpha = dtmColor.a; + + // because final color is premultiplied by alpha + textureColor = dtmColor.rgb / dtmColor.a; + + gammaUncorrect(textureColor); // want to output to SRGB without gamma correction + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + } + else + { + // figure out local alpha with sdf + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + { + float distance = nbl::hlsl::numeric_limits::max; + if (objType == ObjectType::LINE) + { + const float2 start = input.getLineStart(); + const float2 end = input.getLineEnd(); + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + + LineStyle style = loadLineStyle(styleIdx); + + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::QUAD_BEZIER) + { + nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); + nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); + + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + + LineStyle style = loadLineStyle(styleIdx); + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio ); + distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); + distance = miterSDF( + P, + input.getLineThickness(), + input.getPolylineConnectorTrapezoidStart(), + input.getPolylineConnectorTrapezoidEnd(), + input.getPolylineConnectorTrapezoidLongBase(), + input.getPolylineConnectorTrapezoidShortBase()); + + } + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, distance); + } + else if (objType == ObjectType::CURVE_BOX) + { + const float minorBBoxUV = input.getMinorBBoxUV(); + const float majorBBoxUV = input.getMajorBBoxUV(); + + nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); + nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); + nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); + nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); + + // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. + nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + + const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); + const float minEv = curveMinMinor.evaluate(minT); + + const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); + const float maxEv = curveMaxMinor.evaluate(maxT); + + const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; + const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; + + if (insideMinor && insideMajor) + { + localAlpha = 1.0; + } + else + { + // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) + // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor + const float InvalidT = nbl::hlsl::numeric_limits::max; + const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; + + const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); + + + float closestDistanceSquared = MAX_DISTANCE_SQUARED; + const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; + + if (minorBBoxUV < minEv) + { + // DO SDF of Min Curve + nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, + float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, + float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); + + nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = minCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + else if (minorBBoxUV > maxEv) + { + // Do SDF of Max Curve + nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, + float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, + float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); + nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + + if (!insideMajor) + { + const bool minLessThanMax = minEv < maxEv; + float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); + if (majorBBoxUV > 1.0) + { + const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveEnd; + } + else + { + const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveStart; + } + + const float majorDistSq = dot(majorDistVector, majorDistVector); + if (majorDistSq < closestDistanceSquared) + closestDistanceSquared = majorDistSq; + } + + const float dist = sqrt(closestDistanceSquared); + localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); + } + + LineStyle style = loadLineStyle(mainObj.styleIdx); + uint32_t textureId = asuint(style.screenSpaceLineWidth); + if (textureId != InvalidTextureIndex) + { + // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected + // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); + localAlpha *= 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f, globals.antiAliasingFactor / 2.0f, msdf); + } + } + else if (objType == ObjectType::FONT_GLYPH) + { + const float2 uv = input.getFontGlyphUV(); + const uint32_t textureId = input.getFontGlyphTextureId(); + + if (textureId != InvalidTextureIndex) + { + float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); + /* + explaining "*= exp2(max(mipLevel,0.0))" + Each mip level has constant MSDFPixelRange + Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip + As we go up 1 mip level, the msdf distance should be multiplied by 2.0 + While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. + It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! + + Alternatively you can think of it as doing this instead: + localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); + Which is reducing the aa feathering as we go up the mip levels. + to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph + */ + msdf *= exp2(max(mipLevel,0.0)); + + LineStyle style = loadLineStyle(mainObj.styleIdx); + const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; + const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f + bolden, globals.antiAliasingFactor / 2.0f + bolden, msdf); + } + } + else if (objType == ObjectType::STATIC_IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIndex) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } + else if (objType == ObjectType::GRID_DTM) + { + // NOTE: create and read from a texture as a last step, you can generate the height values procedurally from a function while you're working on the sdf stuff. + + // Query dtm settings + // use texture Gather to get 4 corners: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-to-gather + // DONE (but needs to be fixed): A. the outlines can be stippled, use phaseshift of the line such that they started from the grid's origin worldspace coordinate + // DONE: B. the contours are computed for triangles, use the same function as for dtms, choose between the two triangles based on local UV coords in current cell + // DONE: Make it so we can choose which diagonal to use to construct the triangle, it's either u=v or u=1-v + // DONE: C. Height shading same as contours (split into two triangles) + + // DONE (but needs to be tested after i implement texture height maps) Heights can have invalid values (let's say NaN) if a cell corner has NaN value then no triangle (for contour and shading) and no outline should include that corner. (see DTM image in discord with gaps) + + // TODO: we need to emulate dilation and do sdf of neighbouring cells as well. because contours, outlines and shading can bleed into other cells for AA. + // [NOTE] Do dilation as last step, when everything else works fine + + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + if (!dtmSettings.drawContourEnabled() && !dtmSettings.drawOutlineEnabled() && !dtmSettings.drawHeightShadingEnabled()) + discard; + + float2 pos = input.getGridDTMScreenSpacePosition(); + float2 uv = input.getImageUV(); + const uint32_t textureId = input.getGridDTMHeightTextureID(); + + float2 topLeft = input.getGridDTMScreenSpaceTopLeft(); + float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); + const float cellWidth = input.getGridDTMScreenSpaceCellWidth(); + + float2 gridSpacePos = uv * gridExtents; + float2 cellCoords; + { + float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; + cellCoords.x = int32_t(gridSpacePosDivGridCellWidth.x); + cellCoords.y = int32_t(gridSpacePosDivGridCellWidth.y); + } + + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + + // grid consists of square cells and cells are divided into two triangles: + // depending on mode it is + // either: or: + // v2a-------v1 v0-------v2b + // | A / | | \ B | + // | / | | \ | + // | / B | | A \ | + // v0-------v2b v2a-------v1 + // + + // calculate screen space coordinates of vertices of the current tiranlge within the grid + dtm::GridDTMTriangle currentTriangle; + dtm::GridDTMCell neighbouringCells[8]; + if (dtmSettings.drawContourEnabled() || dtmSettings.drawHeightShadingEnabled()) + { + if (textureId == InvalidTextureIndex) + discard; + + // heightData.heihts.x - bottom left texel + // heightData.heihts.y - bottom right texel + // heightData.heihts.z - top right texel + // heightData.heihts.w - top left texel + dtm::GridDTMHeightMapData heightData = dtm::retrieveGridDTMCellDataFromHeightMap(gridExtents, cellCoords, cellWidth, texturesU32[NonUniformResourceIndex(textureId)]); + if (heightData.cellDiagonal == E_CELL_DIAGONAL::INVALID) + discard; + + const bool diagonalFromTopLeftToBottomRight = heightData.cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + + float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? + // my ASCII art above explains which triangle is A and which is B + const bool triangleA = diagonalFromTopLeftToBottomRight ? + insideCellCoord.x < insideCellCoord.y : + insideCellCoord.x < cellWidth - insideCellCoord.y; + + if (diagonalFromTopLeftToBottomRight) + { + currentTriangle.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + currentTriangle.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + currentTriangle.vertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + + // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 + //currentTriangle.vertices[0] = float3(0.0f, 0.0f, heightData.heights.w); + //currentTriangle.vertices[1] = float3(cellWidth, cellWidth, heightData.heights.y); + //currentTriangle.vertices[2] = triangleA ? float3(0.0f, cellWidth, heightData.heights.x) : float3(cellWidth, 0.0f, heightData.heights.z); + } + else + { + currentTriangle.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + currentTriangle.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + currentTriangle.vertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + + // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 + //currentTriangle.vertices[0] = float3(0.0f, 0.0f + cellWidth, heightData.heights.x); + //currentTriangle.vertices[1] = float3(0.0f + cellWidth, 0.0f, heightData.heights.z); + //currentTriangle.vertices[2] = triangleA ? float3(0.0f, 0.0f, heightData.heights.w) : float3(cellWidth, cellWidth, heightData.heights.y); + } + + bool isTriangleInvalid = isnan(currentTriangle.vertices[0].z) || isnan(currentTriangle.vertices[1].z) || isnan(currentTriangle.vertices[2].z); + bool isCellPartiallyInvalid = isnan(heightData.heights.x) || isnan(heightData.heights.y) || isnan(heightData.heights.z) || isnan(heightData.heights.w); + + if (isTriangleInvalid) + discard; + + // move from grid space to screen space + [unroll] + for (int i = 0; i < 3; ++i) + currentTriangle.vertices[i].xy += topLeft; + + const float2 neighbouringCellsCellOffsets[8] = { + float2(-1.0f, -1.0f), + float2(0.0f, -1.0f), + float2(1.0f, -1.0f), + float2(-1.0f, 0.0f), + float2(-1.0f, 0.0f), + float2(-1.0f, 1.0f), + float2(0.0f, 1.0f), + float2(1.0f, 1.0f) + }; + + // construct triangles of neighbouring cells + for (int i = 0; i < 8; ++i) + { + float2 neighbouringCellCoords = cellCoords + neighbouringCellsCellOffsets[i]; + neighbouringCells[i] = dtm::calculateCellTriangles(topLeft, gridExtents, neighbouringCellCoords, cellWidth, texturesU32[NonUniformResourceIndex(textureId)]); + } + } + + // find the nearest horizontal and vertical line to the fragment + nbl::hlsl::shapes::Line outlineLineSegments[2]; + { + const float halfCellWidth = cellWidth * 0.5f; + const float2 horizontalBounds = float2(topLeft.y, topLeft.y + gridExtents.y); + const float2 verticalBounds = float2(topLeft.x, topLeft.x + gridExtents.x); + float2 nearestLineRemainingCoords = int2((gridSpacePos + halfCellWidth) / cellWidth) * cellWidth + topLeft; + // shift lines outside of the grid to a bound + nearestLineRemainingCoords.x = clamp(nearestLineRemainingCoords.x, verticalBounds.x, verticalBounds.y); + nearestLineRemainingCoords.y = clamp(nearestLineRemainingCoords.y, horizontalBounds.x, horizontalBounds.y); + + // find the nearest horizontal line + outlineLineSegments[0].P0 = float32_t2(verticalBounds.x, nearestLineRemainingCoords.y); + outlineLineSegments[0].P1 = float32_t2(verticalBounds.y, nearestLineRemainingCoords.y); + // find the nearest vertical line + outlineLineSegments[1].P0 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.x); + outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.y); + + // test diagonal draw (to draw diagonals height or contour shading must be enabled) + //outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); + //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); + } + + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangle.vertices[0].xy, currentTriangle.vertices[1].xy, currentTriangle.vertices[2].xy, input.position.xy); + float height = baryCoord.x * currentTriangle.vertices[0].z + baryCoord.y * currentTriangle.vertices[1].z + baryCoord.z * currentTriangle.vertices[2].z; + float heightDeriv = fwidth(height); + + const bool outOfBoundsUV = uv.x < 0.0f || uv.y < 0.0f || uv.x > 1.0f || uv.y > 1.0f; + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dtmSettings.drawContourEnabled() && !outOfBoundsUV) + { + for (int i = dtmSettings.contourSettingsCount-1u; i >= 0; --i) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], currentTriangle.vertices, input.position.xy, height)); + + // draw shit form neighbouring cells + for (int i = 0; i < 8; ++i) + { + for (int j = dtmSettings.contourSettingsCount - 1u; j >= 0; --j) + { + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], neighbouringCells[i].triangleA.vertices, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], neighbouringCells[i].triangleB.vertices, input.position.xy, height)); + } + } + } + if (dtmSettings.drawOutlineEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, 0.0f)); + if (dtmSettings.drawHeightShadingEnabled() && !outOfBoundsUV) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, currentTriangle.vertices, heightDeriv, input.position.xy, height)); + + textureColor = dtmColor.rgb / dtmColor.a; + localAlpha = dtmColor.a; + + // because final color is premultiplied by alpha + textureColor = dtmColor.rgb / dtmColor.a; + + // test out of bounds draw + /*if (outOfBoundsUV) + textureColor = float3(0.0f, 1.0f, 0.0f); + else + textureColor = float3(0.0f, 0.0f, 1.0f); + + localAlpha = 0.5f;*/ + } + else if (objType == ObjectType::STREAMED_IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIndex) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } + + + if (localAlpha <= 0) + discard; + + uint2 fragCoord = uint2(input.position.xy); + const bool colorFromTexture = objType == ObjectType::STREAMED_IMAGE || objType == ObjectType::STATIC_IMAGE || objType == ObjectType::GRID_DTM; + + return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); + } +} diff --git a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl index 7dba46dd0..2955d22fe 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl @@ -1,9 +1,6 @@ struct PSInputDebug { float4 position : SV_Position; - [[vk::location(0)]] float4 color : COLOR; - [[vk::location(1)]] nointerpolation float4 start_end : COLOR1; - [[vk::location(2)]] nointerpolation uint3 lineWidth_eccentricity_objType : COLOR2; }; [shader("pixel")] diff --git a/62_CAD/shaders/main_pipeline/line_style.hlsl b/62_CAD/shaders/main_pipeline/line_style.hlsl new file mode 100644 index 000000000..f50127667 --- /dev/null +++ b/62_CAD/shaders/main_pipeline/line_style.hlsl @@ -0,0 +1,297 @@ +#ifndef _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ + +#include +#include + +// for usage in upper_bound function +struct StyleAccessor +{ + LineStyle style; + using value_type = float; + + float operator[](const uint32_t ix) + { + return style.getStippleValue(ix); + } +}; + +template +struct StyleClipper +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; + + static StyleClipper construct( + LineStyle style, + CurveType curve, + typename CurveType::ArcLengthCalculator arcLenCalc, + float phaseShift, + float stretch, + float worldToScreenRatio) + { + StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; + + // values for non-uniform stretching with a rigid segment + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + // rigidSegment info in old non stretched pattern + ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; + ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; + ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; + // stretch value for non rigid segments + ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); + // rigidSegment info to new stretched pattern + ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start + ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len + ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end + } + else + { + ret.nonRigidSegmentStretchValue = stretch; + } + + return ret; + } + + // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes + // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern + float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) + { + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment + ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment + ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment + ret *= stretch; + return ret; + } + else + { + return normalizedPlaceInPattern; + } + } + + float_t2 operator()(float_t t) + { + // basicaly 0.0 and 1.0 but with a guardband to discard outside the range + const float_t minT = 0.0 - 1.0; + const float_t maxT = 1.0 + 1.0; + + StyleAccessor styleAccessor = { style }; + const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; + const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); + + const float_t arcLen = arcLenCalc.calcArcLen(t); + const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); + float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); + uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); + + const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; + float_t2 ret = float_t2(InvalidT, InvalidT); + + // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections + const bool notInDrawSection = patternIdx & 0x1; + + // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections + float_t minDrawT = 0.0; + float_t maxDrawT = 1.0; + { + float_t normalizedPlaceInPatternBegin = frac(phaseShift); + normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); + uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); + const bool BeginInNonDrawSection = patternIdxBegin & 0x1; + + if (BeginInNonDrawSection) + { + float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; + diffToRightDrawableSection -= normalizedPlaceInPatternBegin; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; + minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); + } + + // Completely in non-draw section -> clip away: + if (minDrawT >= 1.0) + return ret; + + const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); + const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); + float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); + uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); + const bool EndInNonDrawSection = patternIdxEnd & 0x1; + + if (EndInNonDrawSection) + { + float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; + maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); + } + } + + if (notInDrawSection) + { + float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + + float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; + const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; + float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); + t0 = clamp(t0, minDrawT, maxDrawT); + + float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; + diffToRightDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; + const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; + float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); + t1 = clamp(t1, minDrawT, maxDrawT); + + ret = float_t2(t0, t1); + } + else + { + t = clamp(t, minDrawT, maxDrawT); + ret = float_t2(t, t); + } + + return ret; + } + + LineStyle style; + CurveType curve; + typename CurveType::ArcLengthCalculator arcLenCalc; + float phaseShift; + float stretch; + float worldToScreenRatio; + // precomp value for non uniform stretching + float rigidSegmentStart; + float rigidSegmentEnd; + float rigidSegmentLen; + float nonRigidSegmentStretchValue; +}; + +typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; +typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; + +template +struct DefaultClipper +{ + using float_t2 = vector; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; + + static DefaultClipper construct() + { + DefaultClipper ret; + return ret; + } + + inline float_t2 operator()(const float_t t) + { + const float_t ret = clamp(t, 0.0, 1.0); + return float_t2(ret, ret); + } +}; + +template > +struct ClippedSignedDistance +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + + const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) + { + typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); + + const float_t InvalidT = nbl::hlsl::numeric_limits::max; + // TODO: Fix and test, we're not working with squared distance anymore + const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? + + bool clipped = false; + float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; + float_t closestT = InvalidT; + [[unroll(CurveType::MaxCandidates)]] + for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) + { + const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); + if (candidateDistanceSquared < closestDistanceSquared) + { + float_t2 snappedTs = clipper(candidates[i]); + + if (snappedTs[0] == InvalidT) + { + continue; + } + + if (snappedTs[0] != candidates[i]) + { + // left snapped or clamped + const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); + if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[0]; + closestDistanceSquared = leftSnappedCandidateDistanceSquared; + } + + if (snappedTs[0] != snappedTs[1]) + { + // right snapped or clamped + const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); + if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[1]; + closestDistanceSquared = rightSnappedCandidateDistanceSquared; + } + } + } + else + { + // no snapping + if (candidateDistanceSquared < closestDistanceSquared) + { + clipped = false; + closestT = candidates[i]; + closestDistanceSquared = candidateDistanceSquared; + } + } + } + } + + + float_t roundedDistance = closestDistanceSquared - thickness; + if (!isRoadStyle) + { + return roundedDistance; + } + else + { + const float_t aaWidth = globals.antiAliasingFactor; + float_t rectCappedDistance = roundedDistance; + + if (clipped) + { + float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); + rectCappedDistance = capSquare(q, thickness, aaWidth); + } + + return rectCappedDistance; + } + } + + static float capSquare(float_t2 q, float_t th, float_t aaWidth) + { + float_t2 d = abs(q) - float_t2(aaWidth, th); + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); + } +}; + +#endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index 46c5d28e0..69bab6bde 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -1,6 +1,5 @@ #include "common.hlsl" #include -#include template float32_t4 calculateFinalColor(const uint2 fragCoord); @@ -16,36 +15,59 @@ template<> float32_t4 calculateFinalColor(const uint2 fragCoord) { float32_t4 color; - - nbl::hlsl::spirv::beginInvocationInterlockEXT(); + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + bool resolve = false; + uint32_t toResolveStyleIdx = InvalidStyleIdx; const uint32_t packedData = pseudoStencil[fragCoord]; const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool resolve = storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; + const bool currentlyActiveMainObj = (storedMainObjectIdx == globals.currentlyActiveMainObjectIndex); + if (!currentlyActiveMainObj) + { + // Normal Scenario, this branch will always be taken if there is no overflow submit in the middle of an active mainObject + //we do the final resolve of the pixel and invalidate the pseudo-stencil + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); + + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + resolve = storedMainObjectIdx != InvalidMainObjectIdx; - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + } + else if (globals.currentlyActiveMainObjectIndex != InvalidMainObjectIdx) { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + // Being here means there was an overflow submit in the middle of an active main objejct + // We don't want to resolve the active mainObj, because it needs to fully resolved later when the mainObject actually finishes. + // We change the active main object index in our pseudo-stencil to 0u, because that will be it's new index in the next submit. + uint32_t newMainObjectIdx = 0u; + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(storedQuantizedAlpha, newMainObjectIdx, AlphaBits, MainObjectIdxBits); + resolve = false; // just to re-iterate that we don't want to resolve this. } + nbl::hlsl::spirv::endInvocationInterlockEXT(); if (!resolve) discard; + // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; + { + color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + color.a *= float(storedQuantizedAlpha) / 255.f; return color; @@ -55,5 +77,5 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) [shader("pixel")] float4 resolveAlphaMain(float4 position : SV_Position) : SV_TARGET { - return calculateFinalColor(position.xy); + return calculateFinalColor(position.xy); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index bff4182f6..fd327e7fd 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -5,7 +5,6 @@ #include #include #include -#include // TODO[Lucas]: Move these functions to builtin hlsl functions (Even the shadertoy obb and aabb ones) float cross2D(float2 a, float2 b) @@ -23,21 +22,43 @@ float2 QuadraticBezier(float2 p0, float2 p1, float2 p2, float t) return shapes::QuadraticBezier::construct(p0, p1, p2).evaluate(t); } -ClipProjectionData getClipProjectionData(in MainObject mainObj) +struct NDCClipProjectionData { - if (mainObj.clipProjectionAddress != InvalidClipProjectionAddress) + pfloat64_t3x3 projectionToNDC; // pre-multiplied projection in a tree + float32_t2 minClipNDC; + float32_t2 maxClipNDC; +}; + +NDCClipProjectionData getClipProjectionData(in MainObject mainObj) +{ + NDCClipProjectionData ret; + if (mainObj.customProjectionIndex != InvalidCustomProjectionIndex) { - ClipProjectionData ret; - ret.projectionToNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress, 8u); - ret.minClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3), 8u); - ret.maxClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3) + sizeof(float32_t2), 8u); + // If projection type is worldspace projection and clip: + pfloat64_t3x3 customProjection = loadCustomProjection(mainObj.customProjectionIndex); + ret.projectionToNDC = nbl::hlsl::mul(globals.defaultProjectionToNDC, customProjection); + } + else + ret.projectionToNDC = globals.defaultProjectionToNDC; - return ret; + if (mainObj.customClipRectIndex != InvalidCustomClipRectIndex) + { + WorldClipRect worldClipRect = loadCustomClipRect(mainObj.customClipRectIndex); + + /// [NOTE]: Optimization: we avoid looking for min/max in the shader because minClip and maxClip in default worldspace are defined in such a way that minClip.y > maxClip.y so minClipNDC.y < maxClipNDC.y + ret.minClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.minClip)); + ret.maxClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.maxClip)); } else { - return globals.defaultClipProjection; + ret.minClipNDC = float2(-1.0f, -1.0f); + ret.maxClipNDC = float2(+1.0f, +1.0f); } + + if (mainObj.transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) + ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, globals.screenToWorldScaleTransform); + + return ret; } float2 transformPointScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolution, pfloat64_t2 point2d) @@ -87,18 +108,8 @@ void dilateHatch(out float2 outOffsetVec, out float2 outUV, const float2 PSInput main(uint vertexID : SV_VertexID) { - // TODO[Przemek]: Disable Everything here and do your own thing as we already discussed, but let's have the same PSInput data passed to fragment. - // your programmable pulling will use the baseVertexBufferAddress BDA address and `vertexID` to RawBufferLoad it's vertex. - // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ - // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. - - const uint vertexIdx = vertexID & 0x3u; - const uint objectID = vertexID >> 2; - - DrawObject drawObj = drawObjects[objectID]; - - ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); - uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; + NDCClipProjectionData clipProjectionData; + PSInput outV; // Default Initialize PS Input @@ -107,486 +118,644 @@ PSInput main(uint vertexID : SV_VertexID) outV.data2 = float4(0, 0, 0, 0); outV.data3 = float4(0, 0, 0, 0); outV.data4 = float4(0, 0, 0, 0); - outV.interp_data5 = float2(0, 0); - outV.setObjType(objType); - outV.setMainObjectIdx(drawObj.mainObjIndex); - - MainObject mainObj = mainObjects[drawObj.mainObjIndex]; - ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); - - // We only need these for Outline type objects like lines and bezier curves - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + outV.interp_data5 = float4(0, 0, 0, 0); + + if (pc.isDTMRendering) { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; - - // Width is on both sides, thickness is one one side of the curve (div by 2.0f) - const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; - const float sdfLineThickness = screenSpaceLineWidth / 2.0f; - outV.setLineThickness(sdfLineThickness); - outV.setCurrentWorldToScreenRatio( - _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) - ); - - if (objType == ObjectType::LINE) + outV.setObjType(ObjectType::TRIANGLE_MESH); + outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); + + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); + + MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + clipProjectionData = getClipProjectionData(mainObj); + + // assuming there are 3 * N vertices, number of vertices is equal to number of indices and indices are sequential starting from 0 + float2 transformedOriginalPos; + float2 transformedDilatedPos; { - pfloat64_t2 points[2u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(LinePointInfo), 8u); + uint32_t firstVertexOfCurrentTriangleIndex = vertexID - vertexID % 3; + uint32_t currentVertexWithinTriangleIndex = vertexID - firstVertexOfCurrentTriangleIndex; + + TriangleMeshVertex triangleVertices[3]; + triangleVertices[0] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u); + triangleVertices[1] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u); + triangleVertices[2] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u); + transformedOriginalPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, triangleVertices[currentVertexWithinTriangleIndex].pos); + + pfloat64_t2 triangleCentroid; + triangleCentroid.x = (triangleVertices[0].pos.x + triangleVertices[1].pos.x + triangleVertices[2].pos.x) / _static_cast(3.0f); + triangleCentroid.y = (triangleVertices[0].pos.y + triangleVertices[1].pos.y + triangleVertices[2].pos.y) / _static_cast(3.0f); + + // move triangles to local space, with centroid at (0, 0) + triangleVertices[0].pos = triangleVertices[0].pos - triangleCentroid; + triangleVertices[1].pos = triangleVertices[1].pos - triangleCentroid; + triangleVertices[2].pos = triangleVertices[2].pos - triangleCentroid; + + // TODO: calculate dialation factor + // const float dilateByPixels = 0.5 * (dtmSettings.maxScreenSpaceLineWidth + dtmSettings.maxWorldSpaceLineWidth * globals.screenToWorldRatio) + aaFactor; + + pfloat64_t dialationFactor = _static_cast(2.0f); + pfloat64_t2 dialatedVertex = triangleVertices[currentVertexWithinTriangleIndex].pos * dialationFactor; - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); + dialatedVertex = dialatedVertex + triangleCentroid; - float2 transformedPoints[2u]; - for (uint i = 0u; i < 2u; ++i) - { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } + transformedDilatedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, dialatedVertex); + } - const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); - const float2 normalToLine = float2(-lineVector.y, lineVector.x); + outV.position = transformFromSreenSpaceToNdc(transformedDilatedPos, globals.resolution); + const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); + outV.setScreenSpaceVertexAttribs(float3(transformedOriginalPos, heightAsFloat)); - if (vertexIdx == 0u || vertexIdx == 1u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[0u] - + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) - - lineVector * antiAliasedLineThickness; - } - else // if (vertexIdx == 2u || vertexIdx == 3u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[1u] - + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) - + lineVector * antiAliasedLineThickness; - } + // full screen triangle (this will destroy outline, contour line and height drawing) +#if 0 + const uint vertexIdx = vertexID % 3; + if(vertexIdx == 0) + outV.position.xy = float2(-1.0f, -1.0f); + else if (vertexIdx == 1) + outV.position.xy = float2(-1.0f, 3.0f); + else if (vertexIdx == 2) + outV.position.xy = float2(3.0f, -1.0f); +#endif + } + else + { + const uint vertexIdx = vertexID & 0x3u; + const uint objectID = vertexID >> 2; - outV.setLineStart(transformedPoints[0u]); - outV.setLineEnd(transformedPoints[1u]); + DrawObject drawObj = loadDrawObject(objectID); - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else if (objType == ObjectType::QUAD_BEZIER) + ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); + uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; + outV.setObjType(objType); + outV.setMainObjectIdx(drawObj.mainObjIndex); + + MainObject mainObj = loadMainObject(drawObj.mainObjIndex); + clipProjectionData = getClipProjectionData(mainObj); + + // We only need these for Outline type objects like lines and bezier curves + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) { - pfloat64_t2 points[3u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - points[2u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); - - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); - - // transform these points into screen space and pass to fragment - float2 transformedPoints[3u]; - for (uint i = 0u; i < 3u; ++i) - { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); - shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); - shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); - shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); - - outV.setQuadratic(quadratic); - outV.setQuadraticPrecomputedArcLenData(preCompData); - - float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; - float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - - // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ - float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; - float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; - float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; - float MaxCurvature; - if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) - MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); - else - MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - - // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; - // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) - // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, - const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); - if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) - { - //OBB Fallback - float2 obbV0; - float2 obbV1; - float2 obbV2; - float2 obbV3; - quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); - if (subsectionIdx == 0) - { - if (vertexIdx == 0u) - outV.position = float4(obbV0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(obbV1, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(obbV3, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(obbV2, 0.0, 1.0f); - } - else - outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); - } - else + // Width is on both sides, thickness is one one side of the curve (div by 2.0f) + const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + lineStyle.worldSpaceLineWidth * globals.screenToWorldRatio; + const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; + const float sdfLineThickness = screenSpaceLineWidth / 2.0f; + outV.setLineThickness(sdfLineThickness); + + if (objType == ObjectType::LINE) { - // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation - // this is the place where we use it's tangent in the bezier to form sides the cages - const float optimalT = 0.145f; + pfloat64_t2 points[2u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - // Whether or not to flip the the interior cage nodes - int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); - const float middleT = 0.5f; - float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); - float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); - float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; + float2 transformedPoints[2u]; + for (uint i = 0u; i < 2u; ++i) + { + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); + } - /* - P1 - + + const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); + const float2 normalToLine = float2(-lineVector.y, lineVector.x); + if (vertexIdx == 0u || vertexIdx == 1u) + { + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[0u] + + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) + - lineVector * antiAliasedLineThickness; + } + else // if (vertexIdx == 2u || vertexIdx == 3u) + { + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[1u] + + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) + + lineVector * antiAliasedLineThickness; + } - exterior0 exterior1 - ---------------------- - / \- - -/ ---------------- \ - / -/interior0 interior1 - / / \ \- - -/ -/ \- \ - / -/ \ \- - / / \- \ - P0 + \ + P2 - */ + outV.setLineStart(transformedPoints[0u]); + outV.setLineEnd(transformedPoints[1u]); - // Internal cage points - float2 interior0; - float2 interior1; + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; + } + else if (objType == ObjectType::QUAD_BEZIER) + { + pfloat64_t2 points[3u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + points[2u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); + + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); + + // transform these points into screen space and pass to fragment + float2 transformedPoints[3u]; + for (uint i = 0u; i < 3u; ++i) + { + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); + } - float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); + shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); + shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); + outV.setQuadratic(quadratic); + outV.setQuadraticPrecomputedArcLenData(preCompData); - float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); - float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; - float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; - float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; + float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); - float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; - float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; - float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ + float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; + float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; + float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; + float MaxCurvature; + if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) + MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); + else + MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - // Interiors + // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; + // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) + // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, + const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); + if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; + //OBB Fallback + float2 obbV0; + float2 obbV1; + float2 obbV2; + float2 obbV3; + quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); + if (subsectionIdx == 0) + { + if (vertexIdx == 0u) + outV.position = float4(obbV0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(obbV1, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(obbV3, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(obbV2, 0.0, 1.0f); + } + else + outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); } + else { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation + // this is the place where we use it's tangent in the bezier to form sides the cages + const float optimalT = 0.145f; + + // Whether or not to flip the the interior cage nodes + int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; + + const float middleT = 0.5f; + float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); + float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); + float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; + + /* + P1 + + + + + exterior0 exterior1 + ---------------------- + / \- + -/ ---------------- \ + / -/interior0 interior1 + / / \ \- + -/ -/ \- \ + / -/ \ \- + / / \- \ + P0 + \ + P2 + */ + + // Internal cage points + float2 interior0; + float2 interior1; + + float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + + + float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); + float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; + float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; + float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + + float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); + float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; + float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; + float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + + // Interiors + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; + } + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + } + + if (subsectionIdx == 0u) + { + float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior0, 0.0, 1.0f); + } + else if (subsectionIdx == 1u) + { + if (vertexIdx == 0u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(interior0, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } + else if (subsectionIdx == 2u) + { + float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } } - if (subsectionIdx == 0u) - { - float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float FLOAT_INF = numeric_limits::infinity; + const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior0, 0.0, 1.0f); - } - else if (subsectionIdx == 1u) - { - if (vertexIdx == 0u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(interior0, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); - } - else if (subsectionIdx == 2u) + if (lineStyle.isRoadStyleFlag) { - float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; + const pfloat64_t2 circleCenter = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const float2 v = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); - } - } + const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); + outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); - outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float FLOAT_INF = numeric_limits::infinity; - const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); + // Find other miter vertices + const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); + const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); - if (lineStyle.isRoadStyleFlag) - { - const pfloat64_t2 circleCenter = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - const float2 v = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); + // Pass the precomputed trapezoid values for the sdf + { + float vLen = length(v); + float2 intersectionDirection = v / vLen; - const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); - outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); + float longBase = sinHalfAngleBetweenNormals; + float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); + // height of the trapezoid / triangle + float hLen = min(globals.miterLimit, vLen); - // Find other miter vertices - const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); - const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); + outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection * sdfLineThickness); + outV.setPolylineConnectorTrapezoidEnd(intersectionDirection * hLen * sdfLineThickness); + outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); + outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); + } - // Pass the precomputed trapezoid values for the sdf - { - float vLen = length(v); - float2 intersectionDirection = v / vLen; - - float longBase = sinHalfAngleBetweenNormals; - float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); - // height of the trapezoid / triangle - float hLen = min(globals.miterLimit, vLen); - - outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection * sdfLineThickness); - outV.setPolylineConnectorTrapezoidEnd(intersectionDirection * hLen * sdfLineThickness); - outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); - outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); - } + if (vertexIdx == 0u) + { + const float2 V1 = normalize(mul(v, rotationMatrix)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV1 = circleCenterScreenSpace + V1; + outV.position = float4(screenSpaceV1, 0.0f, 1.0f); + } + else if (vertexIdx == 1u) + { + outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); + } + else if (vertexIdx == 2u) + { + // find intersection point vertex + float2 intersectionPoint = v * antiAliasedLineThickness * 2.0f; + intersectionPoint += circleCenterScreenSpace; + outV.position = float4(intersectionPoint, 0.0f, 1.0f); + } + else if (vertexIdx == 3u) + { + const float2 V2 = normalize(mul(rotationMatrix, v)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV2 = circleCenterScreenSpace + V2; + outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + } - if (vertexIdx == 0u) - { - const float2 V1 = normalize(mul(v, rotationMatrix)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV1 = circleCenterScreenSpace + V1; - outV.position = float4(screenSpaceV1, 0.0f, 1.0f); - } - else if (vertexIdx == 1u) - { - outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); - } - else if (vertexIdx == 2u) - { - // find intersection point vertex - float2 intersectionPoint = v * antiAliasedLineThickness * 2.0f; - intersectionPoint += circleCenterScreenSpace; - outV.position = float4(intersectionPoint, 0.0f, 1.0f); + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; } - else if (vertexIdx == 3u) + else { - const float2 V2 = normalize(mul(rotationMatrix, v)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV2 = circleCenterScreenSpace + V2; - outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + outV.position = INVALID_VERTEX; } - - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else - { - outV.position = INVALID_VERTEX; } } - } - else if (objType == ObjectType::CURVE_BOX) - { - CurveBox curveBox; - curveBox.aabbMin = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - curveBox.aabbMax = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - - for (uint32_t i = 0; i < 3; i ++) + else if (objType == ObjectType::CURVE_BOX) { - curveBox.curveMin[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); - curveBox.curveMax[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); - } + CurveBox curveBox; + curveBox.aabbMin = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + curveBox.aabbMax = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + + for (uint32_t i = 0; i < 3; i ++) + { + curveBox.curveMin[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); + curveBox.curveMax[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); + } - pfloat64_t2 aabbMaxXMinY; - aabbMaxXMinY.x = curveBox.aabbMax.x; - aabbMaxXMinY.y = curveBox.aabbMin.y; + pfloat64_t2 aabbMaxXMinY; + aabbMaxXMinY.x = curveBox.aabbMax.x; + aabbMaxXMinY.y = curveBox.aabbMin.y; - pfloat64_t2 aabbMinXMaxY; - aabbMinXMaxY.x = curveBox.aabbMin.x; - aabbMinXMaxY.y = curveBox.aabbMax.y; + pfloat64_t2 aabbMinXMaxY; + aabbMinXMaxY.x = curveBox.aabbMin.x; + aabbMinXMaxY.y = curveBox.aabbMax.y; - const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); - const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); + const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); + const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); - const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); + const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); - // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) - outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); + // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) + outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); - const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); + const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); - // We don't dilate on AMD (= no fragShaderInterlock) - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? - float2 dilateVec; - float2 dilatedUV; - dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); + // We don't dilate on AMD (= no fragShaderInterlock) + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? + float2 dilateVec; + float2 dilatedUV; + dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); - // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx - const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + - curveBox.aabbMax * undilatedCornerF64; + // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx + const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + + curveBox.aabbMax * undilatedCornerF64; - const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); + const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); - outV.position = float4(coord, 0.f, 1.f); + outV.position = float4(coord, 0.f, 1.f); - const uint major = (uint)SelectedMajorAxis; - const uint minor = 1-major; - - // A, B & C get converted from unorm to [0, 1] - // A & B get converted from [0,1] to [-2, 2] - shapes::Quadratic curveMin = shapes::Quadratic::construct( - curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); - shapes::Quadratic curveMax = shapes::Quadratic::construct( - curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); - - outV.setMinorBBoxUV(dilatedUV[minor]); - outV.setMajorBBoxUV(dilatedUV[major]); - - outV.setCurveMinMinor(math::equations::Quadratic::construct( - curveMin.A[minor], - curveMin.B[minor], - curveMin.C[minor])); - outV.setCurveMinMajor(math::equations::Quadratic::construct( - curveMin.A[major], - curveMin.B[major], - curveMin.C[major])); - - outV.setCurveMaxMinor(math::equations::Quadratic::construct( - curveMax.A[minor], - curveMax.B[minor], - curveMax.C[minor])); - outV.setCurveMaxMajor(math::equations::Quadratic::construct( - curveMax.A[major], - curveMax.B[major], - curveMax.C[major])); - - //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( - // curveMin.A[major], - // curveMin.B[major], - // curveMin.C[major] - maxCorner[major]); - //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( - // curveMax.A[major], - // curveMax.B[major], - // curveMax.C[major] - maxCorner[major]); - //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); - //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); - } - else if (objType == ObjectType::FONT_GLYPH) - { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; - const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style + const uint major = (uint)SelectedMajorAxis; + const uint minor = 1-major; + + // A, B & C get converted from unorm to [0, 1] + // A & B get converted from [0,1] to [-2, 2] + shapes::Quadratic curveMin = shapes::Quadratic::construct( + curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); + shapes::Quadratic curveMax = shapes::Quadratic::construct( + curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); + + outV.setMinorBBoxUV(dilatedUV[minor]); + outV.setMajorBBoxUV(dilatedUV[major]); + + outV.setCurveMinMinor(math::equations::Quadratic::construct( + curveMin.A[minor], + curveMin.B[minor], + curveMin.C[minor])); + outV.setCurveMinMajor(math::equations::Quadratic::construct( + curveMin.A[major], + curveMin.B[major], + curveMin.C[major])); + + outV.setCurveMaxMinor(math::equations::Quadratic::construct( + curveMax.A[minor], + curveMax.B[minor], + curveMax.C[minor])); + outV.setCurveMaxMajor(math::equations::Quadratic::construct( + curveMax.A[major], + curveMax.B[major], + curveMax.C[major])); + + //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( + // curveMin.A[major], + // curveMin.B[major], + // curveMin.C[major] - maxCorner[major]); + //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( + // curveMax.A[major], + // curveMax.B[major], + // curveMax.C[major] - maxCorner[major]); + //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); + //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); + } + else if (objType == ObjectType::FONT_GLYPH) + { + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style - GlyphInfo glyphInfo; - glyphInfo.topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - glyphInfo.dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - glyphInfo.aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - float32_t2 minUV = glyphInfo.getMinUV(); - uint16_t textureID = glyphInfo.getTextureID(); - - const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; - const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); - const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); - const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) - const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) + GlyphInfo glyphInfo; + glyphInfo.topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + glyphInfo.dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + glyphInfo.aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + float32_t2 minUV = glyphInfo.getMinUV(); + uint16_t textureID = glyphInfo.getTextureID(); + + const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; + const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); + const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); + const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) + const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) - const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); + const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); - const float2 vx = screenDirU * dilateRate.x; - const float2 vy = screenDirV * dilateRate.y; - const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; - float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; + const float2 vx = screenDirU * dilateRate.x; + const float2 vy = screenDirV * dilateRate.y; + const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; + float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; - if (corner.y == 0 && italicTiltSlope > 0.0f) - coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); + if (corner.y == 0 && italicTiltSlope > 0.0f) + coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); - // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY - // but if the glyph box is stretched in any way then we won't get correct msdf - // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances - // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture - // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize - const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max - const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max - outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader - - // In order to keep the shape scale constant with any dilation values: - // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left - const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); - const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); - const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; + // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY + // but if the glyph box is stretched in any way then we won't get correct msdf + // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances + // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture + // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize + const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max + const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max + outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader + + // In order to keep the shape scale constant with any dilation values: + // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left + const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); + const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); + const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; - const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); + const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); - outV.position = float4(coord, 0.f, 1.f); - outV.setFontGlyphUV(uv); - outV.setFontGlyphTextureId(textureID); - } - else if (objType == ObjectType::IMAGE) - { - pfloat64_t2 topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - float32_t2 dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - float32_t aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - uint32_t textureID = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; - const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); - const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); - const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - float2 uv = corner; // non-dilated + outV.position = float4(coord, 0.f, 1.f); + outV.setFontGlyphUV(uv); + outV.setFontGlyphTextureId(textureID); + } + else if (objType == ObjectType::STATIC_IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated - float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; - outV.position = float4(ndcCorner, 0.f, 1.f); - outV.setImageUV(uv); - outV.setImageTextureId(textureID); - } + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } + else if (objType == ObjectType::GRID_DTM) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + pfloat64_t2 worldSpaceExtents = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2), 8u); + float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); + float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); + + // test large dilation + //thicknessOfTheThickestLine += 200.0f; + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); + + pfloat64_t2 vtxPos = topLeft; + vtxPos.x = vtxPos.x + worldSpaceExtents.x * corner.x; + vtxPos.y = vtxPos.y + worldSpaceExtents.y * corner.y; + worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); + + outV.setGridDTMHeightTextureID(textureID); + outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); + outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); + outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); + outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * globals.screenToWorldRatio); + + static const float SquareRootOfTwo = 1.4142135f; + const pfloat64_t dilationFactor = SquareRootOfTwo * thicknessOfTheThickestLine; + pfloat64_t2 dilationVector = pfloat64_t2(dilationFactor, dilationFactor); + + const pfloat64_t dilationFactorTimesTwo = dilationFactor * 2.0f; + const pfloat64_t2 dilatedGridExtents = worldSpaceExtents + pfloat64_t2(dilationFactorTimesTwo, dilationFactorTimesTwo); + const float2 uvScale = _static_cast(worldSpaceExtents) / _static_cast(dilatedGridExtents); + float2 uvOffset = float2(dilationFactor, dilationFactor) / _static_cast(dilatedGridExtents); + uvOffset /= uvScale; + + if (corner.x == 0.0f && corner.y == 0.0f) + { + dilationVector.x = -dilationVector.x; + uvOffset.x = -uvOffset.x; + uvOffset.y = -uvOffset.y; + } + else if (corner.x == 0.0f && corner.y == 1.0f) + { + dilationVector.x = -dilationVector.x; + dilationVector.y = -dilationVector.y; + uvOffset.x = -uvOffset.x; + } + else if (corner.x == 1.0f && corner.y == 1.0f) + { + dilationVector.y = -dilationVector.y; + } + else if (corner.x == 1.0f && corner.y == 0.0f) + { + uvOffset.y = -uvOffset.y; + } + + const float2 uv = corner + uvOffset; + outV.setImageUV(uv); + /*printf("uv = { %f, %f } scale = { %f, %f }", _static_cast(uv.x), _static_cast(uv.y), _static_cast(uvScale.x), _static_cast(uvScale.y));*/ + + pfloat64_t2 topLeftToGridCenterVector = worldSpaceExtents * 0.5; + topLeftToGridCenterVector.y = -topLeftToGridCenterVector.y; + pfloat64_t2 gridCenter = topLeft + topLeftToGridCenterVector; + + pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; + + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, dilatedVtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f); -// Make the cage fullscreen for testing: + /*outV.setImageUV(corner); + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f);*/ + } + else if (objType == ObjectType::STREAMED_IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated + + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } + + // Make the cage fullscreen for testing: #if 0 - // disabled for object of POLYLINE_CONNECTOR type, since miters would cover whole screen - if(objType != ObjectType::POLYLINE_CONNECTOR) - { - if (vertexIdx == 0u) - outV.position = float4(-1, -1, 0, 1); - else if (vertexIdx == 1u) - outV.position = float4(-1, +1, 0, 1); - else if (vertexIdx == 2u) - outV.position = float4(+1, -1, 0, 1); - else if (vertexIdx == 3u) - outV.position = float4(+1, +1, 0, 1); - } + // disabled for object of POLYLINE_CONNECTOR type, since miters would cover whole screen + if(objType != ObjectType::POLYLINE_CONNECTOR) + { + if (vertexIdx == 0u) + outV.position = float4(-1, -1, 0, 1); + else if (vertexIdx == 1u) + outV.position = float4(-1, +1, 0, 1); + else if (vertexIdx == 2u) + outV.position = float4(+1, -1, 0, 1); + else if (vertexIdx == 3u) + outV.position = float4(+1, +1, 0, 1); + } #endif - + } outV.clip = float4(outV.position.x - clipProjectionData.minClipNDC.x, outV.position.y - clipProjectionData.minClipNDC.y, clipProjectionData.maxClipNDC.x - outV.position.x, clipProjectionData.maxClipNDC.y - outV.position.y); return outV; } diff --git a/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl new file mode 100644 index 000000000..96647c0e7 --- /dev/null +++ b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl @@ -0,0 +1,6 @@ +#ifndef _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ +#define _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ + +#include +using DeviceConfigCaps = nbl::hlsl::jit::device_capabilities; +#endif // _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ diff --git a/media b/media index 68dbe85b9..4d9fcebb1 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 68dbe85b9849c9b094760428a3639f5c8917d85e +Subproject commit 4d9fcebb12f8c52f61882054b0da9bd60b295ced diff --git a/old_to_refactor/20_Megatexture/main.cpp b/old_to_refactor/20_Megatexture/main.cpp index 35d0692af..5c309ff24 100644 --- a/old_to_refactor/20_Megatexture/main.cpp +++ b/old_to_refactor/20_Megatexture/main.cpp @@ -684,7 +684,7 @@ APP_CONSTRUCTOR(MegaTextureApp) video::IGPUBuffer::SCreationParams bufferCreationParams; bufferCreationParams.usage = asset::IBuffer::EUF_STORAGE_BUFFER_BIT; bufferCreationParams.size = sizeof(video::IGPUVirtualTexture::SPrecomputedData); - core::smart_refctd_ptr utilities = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice)); + core::smart_refctd_ptr utilities = video::IUtilities::create(core::smart_refctd_ptr(logicalDevice)); core::smart_refctd_ptr buffer = utilities->createFilledDeviceLocalBufferOnDedMem(queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], std::move(bufferCreationParams), &gpuvt->getPrecomputedData()); {