From f098335b37df0262e704b06b22dd9f37ec53f2e9 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 26 Feb 2025 11:09:16 +0100 Subject: [PATCH 001/129] DTM setup --- 62_CAD/CTriangleMesh.cpp | 1 + 62_CAD/CTriangleMesh.h | 43 ++++++++++++ 62_CAD/DrawResourcesFiller.cpp | 20 ++++++ 62_CAD/DrawResourcesFiller.h | 3 + 62_CAD/main.cpp | 69 ++++++++++++++++--- 62_CAD/shaders/globals.hlsl | 12 +++- 62_CAD/shaders/main_pipeline/common.hlsl | 5 +- .../shaders/main_pipeline/vertex_shader.hlsl | 25 +++++++ 8 files changed, 165 insertions(+), 13 deletions(-) create mode 100644 62_CAD/CTriangleMesh.cpp create mode 100644 62_CAD/CTriangleMesh.h diff --git a/62_CAD/CTriangleMesh.cpp b/62_CAD/CTriangleMesh.cpp new file mode 100644 index 000000000..5564c0a51 --- /dev/null +++ b/62_CAD/CTriangleMesh.cpp @@ -0,0 +1 @@ +#include "CTriangleMesh.h" \ No newline at end of file diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h new file mode 100644 index 000000000..3f39fb750 --- /dev/null +++ b/62_CAD/CTriangleMesh.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include "shaders/globals.hlsl" + +using namespace nbl; + +class CTriangleMesh final +{ +public: + inline void setVertices(core::vector&& vertices) + { + m_vertices = std::move(vertices); + } + inline void setIndices(core::vector&& indices) + { + m_indices = std::move(indices); + } + + inline const core::vector& getVertices() const + { + return m_vertices; + } + inline const core::vector& getIndices() const + { + return m_indices; + } + + inline size_t getVtxBuffByteSize() const + { + return sizeof(decltype(m_vertices)::value_type); + } + inline size_t getIdxBuffByteSize() const + { + return sizeof(decltype(m_indices)::value_type); + } + + +private: + core::vector m_vertices; + core::vector m_indices; +}; \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7cf96d693..32f72c07b 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -218,6 +218,26 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p } } +void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, core::unordered_map heightColorMap, SIntendedSubmitInfo& intendedNextSubmit) +{ + ICPUBuffer::SCreationParams geometryBuffParams; + + // concatenate the index and vertex buffer into the geometry buffer + const size_t indexBuffSize = mesh.getIdxBuffByteSize(); + const size_t vtxBuffSize = mesh.getVtxBuffByteSize(); + const size_t geometryBufferSizeDataSize = indexBuffSize + vtxBuffSize; + + core::vector geometryBufferData(geometryBufferSizeDataSize); + std::memcpy(geometryBufferData.data(), mesh.getIndices().data(), indexBuffSize); + std::memcpy(geometryBufferData.data() + indexBuffSize, mesh.getVertices().data(), vtxBuffSize); + + SBufferRange geometryBuffRange; + geometryBuffRange.offset = 0; + geometryBuffRange.size = geometryBufferSizeDataSize; + geometryBuffRange.buffer = gpuDrawBuffers.drawObjectsBuffer; + m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, geometryBuffRange, geometryBufferData.data()); +} + // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor void DrawResourcesFiller::drawHatch( const Hatch& hatch, diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index e20514651..f9ab033e9 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -1,5 +1,6 @@ #pragma once #include "Polyline.h" +#include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" #include @@ -76,6 +77,8 @@ struct DrawResourcesFiller void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh(const CTriangleMesh& mesh, core::unordered_map heightColorMap, SIntendedSubmitInfo& intendedNextSubmit); + // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( const Hatch& hatch, diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 637c88eda..a893d9b40 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -57,6 +57,7 @@ enum class ExampleMode CASE_6, // Custom Clip Projections CASE_7, // Images CASE_8, // MSDF and Text + CASE_9, // DTM CASE_COUNT }; @@ -73,7 +74,7 @@ constexpr std::array cameraExtents = 600.0, // CASE_8 }; -constexpr ExampleMode mode = ExampleMode::CASE_4; +constexpr ExampleMode mode = ExampleMode::CASE_9; class Camera2D { @@ -865,7 +866,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); } - pipelineLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(PushConstants) + }; + + pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); } smart_refctd_ptr mainPipelineFragmentShaders = {}; @@ -1387,18 +1394,30 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu const uint32_t currentIndexCount = drawResourcesFiller.getDrawObjectCount() * 6u; IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + if (mode == ExampleMode::CASE_9) + { - // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` - // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); + // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` + // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. + cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer.get() }, asset::EIT_32BIT); - // TODO[Przemek]: binding the same pipelie, no need to change. - cb->bindGraphicsPipeline(graphicsPipeline.get()); - - // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. + // TODO[Przemek]: binding the same pipelie, no need to change. + cb->bindGraphicsPipeline(graphicsPipeline.get()); + + // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &m_pushConstants); - // TODO[Przemek]: draw parameters needs to reflect the mesh involved - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + // TODO[Przemek]: draw parameters needs to reflect the mesh involved + cb->drawIndexed(m_triangleMeshIndexCount, 1u, 0u, 0u, 0u); + } + else + { + cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); + cb->bindGraphicsPipeline(graphicsPipeline.get()); + cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + } + if (fragmentShaderInterlockEnabled) { cb->bindGraphicsPipeline(resolveAlphaGraphicsPipeline.get()); @@ -3231,6 +3250,31 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } + else if (mode == ExampleMode::CASE_9) + { + core::vector vertices = { + { float32_t2(0.0f, 10.0f), 0.0f }, + { float32_t2(-10.0f, -10.0f), 50.0f }, + { float32_t2(10.0f, -10.0f), 100.0f } + }; + + core::vector indices = { + 0, 1, 2 + }; + + core::unordered_map heightColorMap; + heightColorMap.insert({ 0.0f, {0.0f, 1.0f, 0.0f} }); + heightColorMap.insert({ 100.0f, {0.0f, 1.0f, 0.0f} }); + + m_triangleMeshIndexCount = indices.size(); + m_pushConstants.verticesBaseAddress = sizeof(uint32_t) * m_triangleMeshIndexCount; + + CTriangleMesh mesh; + mesh.setVertices(std::move(vertices)); + mesh.setIndices(std::move(indices)); + + drawResourcesFiller.drawTriangleMesh(mesh, heightColorMap, intendedNextSubmit); + } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } @@ -3311,6 +3355,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu #endif std::unique_ptr m_geoTextureRenderer; + + PushConstants m_pushConstants; + size_t m_triangleMeshIndexCount; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 392e796f4..4719f6df8 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -18,7 +18,6 @@ using namespace nbl::hlsl; - // because we can't use jit/device_capabilities.hlsl in c++ code #ifdef __HLSL_VERSION using pfloat64_t = portable_float64_t; @@ -32,6 +31,11 @@ using pfloat64_t3 = nbl::hlsl::vector; using pfloat64_t3x3 = portable_matrix_t3x3; +struct PushConstants +{ + uint64_t verticesBaseAddress; +}; + // TODO: Compute this in a compute shader from the world counterparts // because this struct includes NDC coordinates, the values will change based camera zoom and move // of course we could have the clip values to be in world units and also the matrix to transform to world instead of ndc but that requires extra computations(matrix multiplications) per vertex @@ -265,6 +269,12 @@ NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits: // TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like LineStyle stipple patterns) and requires upper_bound to figure out the color based on height value. // We'll discuss that later or what it will be looking like and how it's gonna get passed to our shaders. +struct TriangleMeshVertex +{ + float32_t2 pos; + float32_t height; +}; + // The color parameter is also used for styling non-curve objects such as text glyphs and hatches with solid color struct LineStyle { diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 17c851a19..ca13db341 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -98,7 +98,10 @@ struct PSInput void setCurrentWorldToScreenRatio(float worldToScreen) { interp_data5.y = worldToScreen; } float getCurrentWorldToScreenRatio() { return interp_data5.y; } - + + void setHeightAtMeshVertex(float height) { interp_data5.x = height; } + float getHeightAtMeshVertex() { return interp_data5.x; } + /* LINE */ float2 getLineStart() { return data2.xy; } float2 getLineEnd() { return data2.zw; } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index bff4182f6..3f9e55605 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -7,6 +7,8 @@ #include #include +[[vk::push_constant]] PushConstants pc; + // TODO[Lucas]: Move these functions to builtin hlsl functions (Even the shadertoy obb and aabb ones) float cross2D(float2 a, float2 b) { @@ -92,6 +94,28 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.verticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); + + PSInput outV; + + pfloat64_t2 vtxPos; + vtxPos.x = _static_cast(vtx.pos.x); + vtxPos.y = _static_cast(vtx.pos.y); + + DrawObject drawObj = drawObjects[0]; + MainObject mainObj = mainObjects[drawObj.mainObjIndex]; + ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); + + float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); + + outV.position.xy = transformedPos; + outV.position.zw = float2(0.0, 1.0); + outV.setHeightAtMeshVertex(vtx.height); + + return outV; + +#if 0 + const uint vertexIdx = vertexID & 0x3u; const uint objectID = vertexID >> 2; @@ -589,4 +613,5 @@ PSInput main(uint vertexID : SV_VertexID) outV.clip = float4(outV.position.x - clipProjectionData.minClipNDC.x, outV.position.y - clipProjectionData.minClipNDC.y, clipProjectionData.maxClipNDC.x - outV.position.x, clipProjectionData.maxClipNDC.y - outV.position.y); return outV; +#endif } From 8058cff295589b358eba291ed22804601cc6c7bf Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 27 Feb 2025 12:18:59 +0100 Subject: [PATCH 002/129] Simple triangle draw --- 62_CAD/CTriangleMesh.h | 26 ++++++--- 62_CAD/DrawResourcesFiller.cpp | 54 +++++++++++++------ 62_CAD/DrawResourcesFiller.h | 2 +- 62_CAD/main.cpp | 29 +++++----- 62_CAD/shaders/globals.hlsl | 6 ++- .../main_pipeline/fragment_shader.hlsl | 2 + .../shaders/main_pipeline/vertex_shader.hlsl | 22 +++++--- 7 files changed, 95 insertions(+), 46 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 3f39fb750..6b5612a5c 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -9,7 +9,17 @@ using namespace nbl; class CTriangleMesh final { public: - inline void setVertices(core::vector&& vertices) + using index_t = uint32_t; + using vertex_t = TriangleMeshVertex; + + struct DrawData + { + PushConstants pushConstants; + uint64_t indexBufferOffset; + uint64_t indexCount; + }; + + inline void setVertices(core::vector&& vertices) { m_vertices = std::move(vertices); } @@ -18,7 +28,7 @@ class CTriangleMesh final m_indices = std::move(indices); } - inline const core::vector& getVertices() const + inline const core::vector& getVertices() const { return m_vertices; } @@ -29,15 +39,19 @@ class CTriangleMesh final inline size_t getVtxBuffByteSize() const { - return sizeof(decltype(m_vertices)::value_type); + return sizeof(vertex_t) * m_vertices.size(); } inline size_t getIdxBuffByteSize() const { - return sizeof(decltype(m_indices)::value_type); + return sizeof(index_t) * m_indices.size(); + } + inline size_t getIdxCnt() const + { + return m_indices.size(); } private: - core::vector m_vertices; - core::vector m_indices; + core::vector m_vertices; + core::vector m_indices; }; \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 32f72c07b..291e0ad88 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -84,7 +84,7 @@ void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; gpuDrawBuffers.geometryBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); gpuDrawBuffers.geometryBuffer->setObjectDebugName("geometryBuffer"); @@ -218,24 +218,48 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p } } -void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, core::unordered_map heightColorMap, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, SIntendedSubmitInfo& intendedNextSubmit) { ICPUBuffer::SCreationParams geometryBuffParams; // concatenate the index and vertex buffer into the geometry buffer - const size_t indexBuffSize = mesh.getIdxBuffByteSize(); - const size_t vtxBuffSize = mesh.getVtxBuffByteSize(); - const size_t geometryBufferSizeDataSize = indexBuffSize + vtxBuffSize; - - core::vector geometryBufferData(geometryBufferSizeDataSize); - std::memcpy(geometryBufferData.data(), mesh.getIndices().data(), indexBuffSize); - std::memcpy(geometryBufferData.data() + indexBuffSize, mesh.getVertices().data(), vtxBuffSize); - - SBufferRange geometryBuffRange; - geometryBuffRange.offset = 0; - geometryBuffRange.size = geometryBufferSizeDataSize; - geometryBuffRange.buffer = gpuDrawBuffers.drawObjectsBuffer; - m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, geometryBuffRange, geometryBufferData.data()); + const size_t indexBuffByteSize = mesh.getIdxBuffByteSize(); + const size_t vtxBuffByteSize = mesh.getVtxBuffByteSize(); + const size_t geometryBufferDataToAddByteSize = indexBuffByteSize + vtxBuffByteSize; + + // copy into gemoetry cpu buffer insteaed + + // TODO: rename, its not just points + const uint32_t maxGeometryBufferPoints = static_cast(maxGeometryBufferSize - currentGeometryBufferSize); + + // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= maxGeometryBufferPoints? + // TODO: auto submit instead of assert + assert(geometryBufferDataToAddByteSize <= maxGeometryBufferPoints); + + // TODO: vertices need to be aligned to 8? + uint64_t vtxBufferAddress; + { + void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; + void* dst1 = dst; + + drawData.indexBufferOffset = currentGeometryBufferSize; + memcpy(dst, mesh.getIndices().data(), indexBuffByteSize); + currentGeometryBufferSize += indexBuffByteSize; + + dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; + drawData.pushConstants.triangleMeshVerticesBaseAddress = geometryBufferAddress + currentGeometryBufferSize; + memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); + currentGeometryBufferSize += vtxBuffByteSize; + } + + drawData.indexCount = mesh.getIdxCnt(); + + // call addMainObject_SubmitIfNeeded, use its index in push constants + + drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(0, intendedNextSubmit); + + // TODO: use this function later for auto submit + //submitCurrentDrawObjectsAndReset(intendedNextSubmit, 0); } // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f9ab033e9..c3b31d32e 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -77,7 +77,7 @@ struct DrawResourcesFiller void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); - void drawTriangleMesh(const CTriangleMesh& mesh, core::unordered_map heightColorMap, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& pushConstants, SIntendedSubmitInfo& intendedNextSubmit); // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index a893d9b40..a14d9de55 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1399,20 +1399,20 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer.get() }, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = m_triangleMeshDrawData.indexBufferOffset, .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer.get() }, asset::EIT_32BIT); // TODO[Przemek]: binding the same pipelie, no need to change. cb->bindGraphicsPipeline(graphicsPipeline.get()); // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. - cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &m_pushConstants); + + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(PushConstants), &m_triangleMeshDrawData.pushConstants); // TODO[Przemek]: draw parameters needs to reflect the mesh involved - cb->drawIndexed(m_triangleMeshIndexCount, 1u, 0u, 0u, 0u); + cb->drawIndexed(m_triangleMeshDrawData.indexCount, 1u, 0u, 0u, 0u); } else { - cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); cb->bindGraphicsPipeline(graphicsPipeline.get()); cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); @@ -3253,27 +3253,25 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu else if (mode == ExampleMode::CASE_9) { core::vector vertices = { - { float32_t2(0.0f, 10.0f), 0.0f }, - { float32_t2(-10.0f, -10.0f), 50.0f }, - { float32_t2(10.0f, -10.0f), 100.0f } + { float32_t2(0.0f, 0.0f), 0.0f }, + { float32_t2(0.0f, 100.0f), 50.0f }, + { float32_t2(200.0f, 50.0f), 100.0f } }; core::vector indices = { 0, 1, 2 }; - core::unordered_map heightColorMap; - heightColorMap.insert({ 0.0f, {0.0f, 1.0f, 0.0f} }); - heightColorMap.insert({ 100.0f, {0.0f, 1.0f, 0.0f} }); - - m_triangleMeshIndexCount = indices.size(); - m_pushConstants.verticesBaseAddress = sizeof(uint32_t) * m_triangleMeshIndexCount; + // TODO: height color map + //core::unordered_map heightColorMap; + //heightColorMap.insert({ 0.0f, {0.0f, 1.0f, 0.0f} }); + //heightColorMap.insert({ 100.0f, {0.0f, 1.0f, 0.0f} }); CTriangleMesh mesh; mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); - drawResourcesFiller.drawTriangleMesh(mesh, heightColorMap, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } @@ -3356,8 +3354,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::unique_ptr m_geoTextureRenderer; - PushConstants m_pushConstants; - size_t m_triangleMeshIndexCount; + CTriangleMesh::DrawData m_triangleMeshDrawData; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 4719f6df8..1902ba39e 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -33,7 +33,8 @@ using pfloat64_t3x3 = portable_matrix_t3x3; struct PushConstants { - uint64_t verticesBaseAddress; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; }; // TODO: Compute this in a compute shader from the world counterparts @@ -111,7 +112,8 @@ enum class ObjectType : uint32_t CURVE_BOX = 2u, POLYLINE_CONNECTOR = 3u, FONT_GLYPH = 4u, - IMAGE = 5u + IMAGE = 5u, + TRIANGLE_MESH = 6u }; enum class MajorAxis : uint32_t diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index e850622c3..2f21a6a0f 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -405,6 +405,8 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET { + return float4(1.0f, 0.0f, 0.0f, 1.0f); + float localAlpha = 0.0f; float3 textureColor = float3(0, 0, 0); // color sampled from a texture diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 3f9e55605..a798549d5 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -94,27 +94,37 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. - TriangleMeshVertex vtx = vk::RawBufferLoad(pc.verticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); - +#define DTM +#ifdef DTM PSInput outV; + // Default Initialize PS Input + outV.position.zw = float2(0.0, 1.0); + outV.data1 = uint4(0, 0, 0, 0); + outV.data2 = float4(0, 0, 0, 0); + outV.data3 = float4(0, 0, 0, 0); + outV.data4 = float4(0, 0, 0, 0); + outV.interp_data5 = float2(0, 0); + outV.setObjType(ObjectType::TRIANGLE_MESH); + outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); + + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 4u); pfloat64_t2 vtxPos; vtxPos.x = _static_cast(vtx.pos.x); vtxPos.y = _static_cast(vtx.pos.y); - DrawObject drawObj = drawObjects[0]; - MainObject mainObj = mainObjects[drawObj.mainObjIndex]; + MainObject mainObj = mainObjects[pc.triangleMeshMainObjectIndex]; ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); outV.position.xy = transformedPos; - outV.position.zw = float2(0.0, 1.0); + outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); outV.setHeightAtMeshVertex(vtx.height); return outV; -#if 0 +#else const uint vertexIdx = vertexID & 0x3u; const uint objectID = vertexID >> 2; From 2b07a421b22636acfcb432cf4dd1aef3c2f02ae0 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 8 Mar 2025 14:27:40 +0100 Subject: [PATCH 003/129] Barycentrics --- 62_CAD/shaders/main_pipeline/common.hlsl | 13 +++++++++++++ 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 8 ++++++++ 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 1 + 3 files changed, 22 insertions(+) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index ca13db341..a0a903a4d 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -74,6 +74,11 @@ struct PSInput [[vk::location(3)]] nointerpolation float4 data4 : COLOR4; // Data segments that need interpolation, mostly for hatches [[vk::location(5)]] float2 interp_data5 : COLOR5; +#ifdef FRAGMENT_SHADER_INPUT + [[vk::location(6)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] nointerpolation float3 vertexScreenSpacePos[3] : COLOR6; +#else + [[vk::location(6)]] nointerpolation float3 vertexScreenSpacePos : COLOR6; +#endif // ArcLenCalculator // Set functions used in vshader, get functions used in fshader @@ -211,6 +216,14 @@ struct PSInput void setImageUV(float2 uv) { interp_data5.xy = uv; } void setImageTextureId(uint32_t textureId) { data2.x = asfloat(textureId); } + + /* TRIANGLE MESH */ + +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + void setScreenSpaceVertexPos(float3 pos) { vertexScreenSpacePos = pos; } +#else // fragment shader + float3 getScreenSpaceVertexPos(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } +#endif }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 2f21a6a0f..ab5885d3d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -1,3 +1,4 @@ +#define FRAGMENT_SHADER_INPUT #include "common.hlsl" #include #include @@ -7,6 +8,7 @@ #include #include #include +#include template struct DefaultClipper @@ -405,6 +407,12 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET { + float3 v0 = input.getScreenSpaceVertexPos(0); + float3 v1 = input.getScreenSpaceVertexPos(1); + float3 v2 = input.getScreenSpaceVertexPos(2); + + printf("v0 = { %f, %f, %f }\nv1 = { %f, %f, %f }\nv2 = { %f, %f, %f }", v0.x, v0.y, v0.z, v1.x, v1.y, v1.z, v2.x, v2.y, v2.z); + return float4(1.0f, 0.0f, 0.0f, 1.0f); float localAlpha = 0.0f; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index a798549d5..f7abd6285 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -121,6 +121,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.position.xy = transformedPos; outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); outV.setHeightAtMeshVertex(vtx.height); + outV.setScreenSpaceVertexPos(float3(transformedPos, 1)); return outV; From 0232ee8d04c1159ec22fe9ca1e406dc462e09970 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 13 Mar 2025 16:52:13 +0100 Subject: [PATCH 004/129] Drawing triangle sdf --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 62_CAD/main.cpp | 8 +++ 62_CAD/shaders/main_pipeline/common.hlsl | 6 +- .../main_pipeline/fragment_shader.hlsl | 69 ++++++++++++++++--- 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 291e0ad88..424569b8f 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -256,7 +256,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM // call addMainObject_SubmitIfNeeded, use its index in push constants - drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(0, intendedNextSubmit); + drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, intendedNextSubmit); // TODO: use this function later for auto submit //submitCurrentDrawObjectsAndReset(intendedNextSubmit, 0); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index a14d9de55..3aad8c4d3 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1501,6 +1501,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu retval.fragmentShaderPixelInterlock = FragmentShaderPixelInterlock; return retval; } + + virtual video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + video::SPhysicalDeviceLimits retval = base_t::getRequiredDeviceLimits(); + retval.fragmentShaderBarycentric = true; + + return retval; + } virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override { diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index a0a903a4d..4fd45ab5c 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -75,10 +75,10 @@ struct PSInput // Data segments that need interpolation, mostly for hatches [[vk::location(5)]] float2 interp_data5 : COLOR5; #ifdef FRAGMENT_SHADER_INPUT - [[vk::location(6)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] nointerpolation float3 vertexScreenSpacePos[3] : COLOR6; + [[vk::location(6)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR6; #else - [[vk::location(6)]] nointerpolation float3 vertexScreenSpacePos : COLOR6; -#endif + [[vk::location(6)]] float3 vertexScreenSpacePos : COLOR6; +#endif // ArcLenCalculator // Set functions used in vshader, get functions used in fshader diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index ab5885d3d..845cb36d7 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -407,22 +407,71 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET { - float3 v0 = input.getScreenSpaceVertexPos(0); - float3 v1 = input.getScreenSpaceVertexPos(1); - float3 v2 = input.getScreenSpaceVertexPos(2); - - printf("v0 = { %f, %f, %f }\nv1 = { %f, %f, %f }\nv2 = { %f, %f, %f }", v0.x, v0.y, v0.z, v1.x, v1.y, v1.z, v2.x, v2.y, v2.z); - - return float4(1.0f, 0.0f, 0.0f, 1.0f); - float localAlpha = 0.0f; float3 textureColor = float3(0, 0, 0); // color sampled from a texture - // TODO[Przemek]: Disable All the object rendering paths if you want. ObjectType objType = input.getObjType(); const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = mainObjects[currentMainObjectIdx]; - + + // TRIANGLE RENDERING + { + float3 v0 = input.getScreenSpaceVertexPos(0); + float3 v1 = input.getScreenSpaceVertexPos(1); + float3 v2 = input.getScreenSpaceVertexPos(2); + + float2 start; + float2 end; + const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + + // TODO: figure out if branching can be reduced + if (baryCoord.x < baryCoord.y && baryCoord.x < baryCoord.z) + { + start = v1; + end = v2; + } + else if (baryCoord.y < baryCoord.x && baryCoord.y < baryCoord.z) + { + start = v0; + end = v2; + } + else if (baryCoord.z < baryCoord.x && baryCoord.z < baryCoord.y) + { + start = v0; + end = v1; + } + + float distance = nbl::hlsl::numeric_limits::max; + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = 2.0f; + const float phaseShift = 0.0f; + const float stretch = 0.0f; + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + + LineStyle style = lineStyles[styleIdx]; + + // TODO: stipples + //if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + //{ + //distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + //} + //else + //{ + // LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + // distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + //} + + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, true); + + localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + } + + textureColor = float3(1.0f, 1.0f, 1.0f); + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + // figure out local alpha with sdf if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) { From 83d8de494f9d7e68c7ac30dde28a63c62d2cd45b Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 14 Mar 2025 16:28:53 +0100 Subject: [PATCH 005/129] Added dtmSettingsBuff --- 62_CAD/CTriangleMesh.h | 9 ++ 62_CAD/DrawResourcesFiller.cpp | 74 +++++++++++++- 62_CAD/DrawResourcesFiller.h | 24 ++++- 62_CAD/SingleLineText.cpp | 2 +- 62_CAD/main.cpp | 98 +++++++++++++++---- 62_CAD/shaders/globals.hlsl | 17 +++- 62_CAD/shaders/main_pipeline/common.hlsl | 9 +- .../main_pipeline/fragment_shader.hlsl | 41 ++++---- .../shaders/main_pipeline/vertex_shader.hlsl | 8 ++ 9 files changed, 225 insertions(+), 57 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 6b5612a5c..d71198005 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -6,6 +6,15 @@ using namespace nbl; +struct DTMSettingsInfo +{ + LineStyleInfo outlineLineStyleInfo; + LineStyleInfo contourLineStyleInfo; + // TODO: heights + + +}; + class CTriangleMesh final { public: diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 424569b8f..995ecfacc 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -116,6 +116,24 @@ void DrawResourcesFiller::allocateStylesBuffer(ILogicalDevice* logicalDevice, ui } } +void DrawResourcesFiller::allocateDTMSettingsBuffer(ILogicalDevice* logicalDevice, uint32_t dtmSettingsCount) +{ + maxDtmSettings = dtmSettingsCount; + size_t dtmSettingsBufferSize = dtmSettingsCount * sizeof(DTMSettings); + + IGPUBuffer::SCreationParams dtmSettingsCreationParams = {}; + dtmSettingsCreationParams.size = dtmSettingsBufferSize; + dtmSettingsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + gpuDrawBuffers.dtmSettingsBuffer = logicalDevice->createBuffer(std::move(dtmSettingsCreationParams)); + gpuDrawBuffers.dtmSettingsBuffer->setObjectDebugName("dtmSettingsBuffer"); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.dtmSettingsBuffer->getMemoryReqs(); + memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + auto stylesBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.dtmSettingsBuffer.get()); + + cpuDrawBuffers.dtmSettingsBuffer = ICPUBuffer::create({ dtmSettingsBufferSize }); +} + void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); @@ -172,7 +190,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyleInfo, intendedNextSubmit); - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); drawPolyline(polyline, mainObjIdx, intendedNextSubmit); } @@ -218,7 +236,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p } } -void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { ICPUBuffer::SCreationParams geometryBuffParams; @@ -256,7 +274,9 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM // call addMainObject_SubmitIfNeeded, use its index in push constants - drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, intendedNextSubmit); + uint32_t dtmSettingsIndex = addDTMSettings_SubmitIfNeeded(dtmSettingsInfo, intendedNextSubmit); + + drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); // TODO: use this function later for auto submit //submitCurrentDrawObjectsAndReset(intendedNextSubmit, 0); @@ -304,7 +324,7 @@ void DrawResourcesFiller::drawHatch( lineStyle.screenSpaceLineWidth = nbl::hlsl::bit_cast(textureIdx); const uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject used in vertex shader. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { @@ -379,10 +399,27 @@ uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& l return outLineStyleIdx; } -uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + if (outDTMSettingIdx == InvalidStyleIdx) + { + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + resetGeometryCounters(); + resetMainObjectCounters(); + resetLineStyleCounters(); + outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + assert(outDTMSettingIdx != InvalidDTMSettingsIdx); + } + return outDTMSettingIdx; +} + +uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, uint32_t dtmSettingsIdx, SIntendedSubmitInfo& intendedNextSubmit) { MainObject mainObject = {}; mainObject.styleIdx = styleIdx; + mainObject.dtmSettingsIdx = dtmSettingsIdx; mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); if (outMainObjectIdx == InvalidMainObjectIdx) @@ -728,6 +765,33 @@ uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineSty return currentLineStylesCount++; } +uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +{ + DTMSettings dtmSettings; + + // TODO: this needs to be redone.. what if submit happens after that line? + // we need to make sure somehow that function below will not submit, we need both outline and contour styles in GPU memory + dtmSettings.outlineLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.outlineLineStyleInfo, intendedNextSubmit); + dtmSettings.contourLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.contourLineStyleInfo, intendedNextSubmit); + + DTMSettings* settingsArray = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()); + for (uint32_t i = 0u; i < currentDTMSettingsCount; ++i) + { + const DTMSettings& itr = settingsArray[i]; + if (itr == dtmSettings) + return i; + } + + if (currentDTMSettingsCount >= maxDtmSettings) + return InvalidDTMSettingsIdx; + + void* dst = settingsArray + currentDTMSettingsCount; + memcpy(dst, &dtmSettings, sizeof(DTMSettings)); + return currentDTMSettingsCount++; + + return InvalidDTMSettingsIdx; +} + uint64_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) { if (clipProjectionAddresses.empty()) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index c3b31d32e..ef7eab307 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -27,6 +27,7 @@ struct DrawBuffers smart_refctd_ptr drawObjectsBuffer; smart_refctd_ptr geometryBuffer; smart_refctd_ptr lineStylesBuffer; + smart_refctd_ptr dtmSettingsBuffer; }; // ! DrawResourcesFiller @@ -57,6 +58,8 @@ struct DrawResourcesFiller void allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size); void allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount); + + void allocateDTMSettingsBuffer(ILogicalDevice* logicalDevice, uint32_t dtmSettingsCount); void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); @@ -77,7 +80,7 @@ struct DrawResourcesFiller void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); - void drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& pushConstants, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( @@ -145,7 +148,7 @@ struct DrawResourcesFiller return false; }; - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, intendedNextSubmit); + uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); ImageObjectInfo info = {}; info.topLeft = topLeftPos; @@ -194,12 +197,15 @@ struct DrawResourcesFiller resetGeometryCounters(); resetMainObjectCounters(); resetLineStyleCounters(); + resetDTMSettingsCounters(); } DrawBuffers cpuDrawBuffers; DrawBuffers gpuDrawBuffers; uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); // TODO[Przemek]: Read after reading the fragment shader comments and having a basic understanding of the relationship between "mainObject" and our programmable blending resolve: // Use `addMainObject_SubmitIfNeeded` to push your single mainObject you'll be using for the enitre triangle mesh (this will ensure overlaps between triangles of the same mesh is resolved correctly) @@ -209,7 +215,7 @@ struct DrawResourcesFiller // Never call this function multiple times in a row before indexing it in a drawable, because future auto-submits may invalidate mainObjects, so do them one by one, for example: // Valid: addMainObject1 --> addXXX(mainObj1) ---> addMainObject2 ---> addXXX(mainObj2) .... // Invalid: addMainObject1 ---> addMainObject2 ---> addXXX(mainObj1) ---> addXXX(mainObj2) .... - uint32_t addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit); + uint32_t addMainObject_SubmitIfNeeded(uint32_t styleIdx, uint32_t dtmSettingsIdx, SIntendedSubmitInfo& intendedNextSubmit); // we need to store the clip projection stack to make sure the front is always available in memory void pushClipProjectionData(const ClipProjectionData& clipProjectionData); @@ -257,6 +263,8 @@ struct DrawResourcesFiller uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. uint64_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); @@ -314,6 +322,12 @@ struct DrawResourcesFiller inMemLineStylesCount = 0u; } + void resetDTMSettingsCounters() + { + currentDTMSettingsCount = 0u; + inMemDTMSettingsCount = 0u; + } + MainObject* getMainObject(uint32_t idx) { MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); @@ -432,6 +446,10 @@ struct DrawResourcesFiller uint32_t currentLineStylesCount = 0u; uint32_t maxLineStyles = 0u; + uint32_t inMemDTMSettingsCount = 0u; + uint32_t currentDTMSettingsCount = 0u; + uint32_t maxDtmSettings = 0u; + uint64_t geometryBufferAddress = 0u; // Actual BDA offset 0 of the gpu buffer std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. diff --git a/62_CAD/SingleLineText.cpp b/62_CAD/SingleLineText.cpp index 4b41cb628..f68f78db3 100644 --- a/62_CAD/SingleLineText.cpp +++ b/62_CAD/SingleLineText.cpp @@ -64,7 +64,7 @@ void SingleLineText::Draw( lineStyle.screenSpaceLineWidth = tan(tiltTiltAngle); lineStyle.worldSpaceLineWidth = boldInPixels; const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - auto glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + auto glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); for (const auto& glyphBox : m_glyphBoxes) { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 3aad8c4d3..cce87e3b2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -294,6 +294,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.allocateMainObjectsBuffer(m_device.get(), maxObjects); drawResourcesFiller.allocateDrawObjectsBuffer(m_device.get(), maxObjects * 5u); drawResourcesFiller.allocateStylesBuffer(m_device.get(), 512u); + drawResourcesFiller.allocateDTMSettingsBuffer(m_device.get(), 512u); // * 3 because I just assume there is on average 3x beziers per actual object (cause we approximate other curves/arcs with beziers now) // + 128 ClipProjData @@ -711,20 +712,27 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }, { .binding = 4u, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + }, + { + .binding = 5u, .type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 5u, + .binding = 6u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 6u, + .binding = 7u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = bindlessTextureFlags, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, @@ -768,7 +776,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout0)); descriptorSet1 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout1)); - constexpr uint32_t DescriptorCountSet0 = 6u; + constexpr uint32_t DescriptorCountSet0 = 7u; video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; // Descriptors For Set 0: @@ -788,11 +796,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorInfosSet0[3u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getCreationParams().size; descriptorInfosSet0[3u].desc = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer; - descriptorInfosSet0[4u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfosSet0[4u].info.combinedImageSampler.sampler = msdfTextureSampler; - descriptorInfosSet0[4u].desc = drawResourcesFiller.getMSDFsTextureArray(); + descriptorInfosSet0[4u].info.buffer.offset = 0u; + descriptorInfosSet0[4u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.dtmSettingsBuffer->getCreationParams().size; + descriptorInfosSet0[4u].desc = drawResourcesFiller.gpuDrawBuffers.dtmSettingsBuffer; + + descriptorInfosSet0[5u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfosSet0[5u].info.combinedImageSampler.sampler = msdfTextureSampler; + descriptorInfosSet0[5u].desc = drawResourcesFiller.getMSDFsTextureArray(); - descriptorInfosSet0[5u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? + descriptorInfosSet0[6u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? // This is bindless to we write to it later. // descriptorInfosSet0[6u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; @@ -813,55 +825,67 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[DescriptorUpdatesCount] = {}; // Set 0 Updates: + // globals descriptorUpdates[0u].dstSet = descriptorSet0.get(); descriptorUpdates[0u].binding = 0u; descriptorUpdates[0u].arrayElement = 0u; descriptorUpdates[0u].count = 1u; descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; + // drawObjectsBuffer descriptorUpdates[1u].dstSet = descriptorSet0.get(); descriptorUpdates[1u].binding = 1u; descriptorUpdates[1u].arrayElement = 0u; descriptorUpdates[1u].count = 1u; descriptorUpdates[1u].info = &descriptorInfosSet0[1u]; + // mainObjectsBuffer descriptorUpdates[2u].dstSet = descriptorSet0.get(); descriptorUpdates[2u].binding = 2u; descriptorUpdates[2u].arrayElement = 0u; descriptorUpdates[2u].count = 1u; descriptorUpdates[2u].info = &descriptorInfosSet0[2u]; + // lineStylesBuffer descriptorUpdates[3u].dstSet = descriptorSet0.get(); descriptorUpdates[3u].binding = 3u; descriptorUpdates[3u].arrayElement = 0u; descriptorUpdates[3u].count = 1u; descriptorUpdates[3u].info = &descriptorInfosSet0[3u]; + // dtmSettingsBuffer descriptorUpdates[4u].dstSet = descriptorSet0.get(); descriptorUpdates[4u].binding = 4u; descriptorUpdates[4u].arrayElement = 0u; descriptorUpdates[4u].count = 1u; descriptorUpdates[4u].info = &descriptorInfosSet0[4u]; - + + // mdfs textures descriptorUpdates[5u].dstSet = descriptorSet0.get(); descriptorUpdates[5u].binding = 5u; descriptorUpdates[5u].arrayElement = 0u; descriptorUpdates[5u].count = 1u; descriptorUpdates[5u].info = &descriptorInfosSet0[5u]; - - // Set 1 Updates: - descriptorUpdates[6u].dstSet = descriptorSet1.get(); - descriptorUpdates[6u].binding = 0u; + + // mdfs samplers + descriptorUpdates[6u].dstSet = descriptorSet0.get(); + descriptorUpdates[6u].binding = 6u; descriptorUpdates[6u].arrayElement = 0u; descriptorUpdates[6u].count = 1u; - descriptorUpdates[6u].info = &descriptorInfosSet1[0u]; + descriptorUpdates[6u].info = &descriptorInfosSet0[6u]; + // Set 1 Updates: descriptorUpdates[7u].dstSet = descriptorSet1.get(); - descriptorUpdates[7u].binding = 1u; + descriptorUpdates[7u].binding = 0u; descriptorUpdates[7u].arrayElement = 0u; descriptorUpdates[7u].count = 1u; - descriptorUpdates[7u].info = &descriptorInfosSet1[1u]; + descriptorUpdates[7u].info = &descriptorInfosSet1[0u]; + descriptorUpdates[8u].dstSet = descriptorSet1.get(); + descriptorUpdates[8u].binding = 1u; + descriptorUpdates[8u].arrayElement = 0u; + descriptorUpdates[8u].count = 1u; + descriptorUpdates[8u].info = &descriptorInfosSet1[1u]; m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); } @@ -3123,7 +3147,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu lineStyle.color = float32_t4(1.0, 1.0, 1.0, 1.0); const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); } float64_t2 currentBaselineStart = float64_t2(0.0, 0.0); @@ -3261,13 +3285,33 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu else if (mode == ExampleMode::CASE_9) { core::vector vertices = { - { float32_t2(0.0f, 0.0f), 0.0f }, - { float32_t2(0.0f, 100.0f), 50.0f }, - { float32_t2(200.0f, 50.0f), 100.0f } + { float32_t2(-200.0f, -200.0f), 0.0f }, + { float32_t2(-50.0f, -200.0f), 0.0f }, + { float32_t2(100.0f, -200.0f), 0.0f }, + { float32_t2(-125.0f, -70.1f), 0.0f }, + { float32_t2(25.0f, -70.1f), 0.0f }, + { float32_t2(175.0f, -70.1f), 0.0f }, + { float32_t2(-200.0f, 59.8f), 0.0f }, + { float32_t2(-50.0f, 59.8f), 0.0f }, + { float32_t2(100.0f, 59.8f), 0.0f }, + { float32_t2(-125.0f, 189.7f), 0.0f }, + { float32_t2(25.0f, 189.7f), 0.0f }, + { float32_t2(175.0f, 189.7f), 0.0f } }; core::vector indices = { - 0, 1, 2 + 0, 1, 3, + 1, 3, 4, + 1, 2, 4, + 2, 4, 5, + 3, 4, 6, + 4, 6, 7, + 4, 5, 7, + 5, 7, 8, + 6, 7, 9, + 7, 9, 10, + 7, 8, 10, + 8, 10, 11 }; // TODO: height color map @@ -3279,7 +3323,19 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); - drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, intendedNextSubmit); + DTMSettingsInfo dtmSettingsInfo; + + LineStyleInfo outlineStyle = {}; + dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 5.0f; + dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.5f, 0.5f, 1.0f); + + LineStyleInfo contourStyle = {}; + dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 5.0f; + dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(1.0f, 0.5f, 0.31f, 1.0f); + + drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, dtmSettingsInfo, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 1902ba39e..e7029a79e 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -126,7 +126,7 @@ enum class MajorAxis : uint32_t struct MainObject { uint32_t styleIdx; - uint32_t pad; // do I even need this on the gpu side? it's stored in structured buffer not bda + uint32_t dtmSettingsIdx; // do I even need this on the gpu side? it's stored in structured buffer not bda uint64_t clipProjectionAddress; }; @@ -328,6 +328,13 @@ struct LineStyle } }; +struct DTMSettings +{ + uint32_t outlineLineStyleIdx; // index into line styles + uint32_t contourLineStyleIdx; // index into line styles + // TODO: + // ContourSettings -> min, max, interval +}; #ifndef __HLSL_VERSION inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) { @@ -350,12 +357,20 @@ inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) return isStipplePatternArrayEqual; } + +inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) +{ + return lhs.outlineLineStyleIdx == rhs.outlineLineStyleIdx && + lhs.contourLineStyleIdx == rhs.contourLineStyleIdx; +} + #endif NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; NBL_CONSTEXPR uint64_t InvalidClipProjectionAddress = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 4fd45ab5c..dc47604ad 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -231,12 +231,13 @@ struct PSInput [[vk::binding(1, 0)]] StructuredBuffer drawObjects : register(t0); [[vk::binding(2, 0)]] StructuredBuffer mainObjects : register(t1); [[vk::binding(3, 0)]] StructuredBuffer lineStyles : register(t2); +[[vk::binding(4, 0)]] StructuredBuffer dtmSettingsBuff : register(t3); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] Texture2DArray msdfTextures : register(t3); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] SamplerState msdfSampler : register(s3); +[[vk::combinedImageSampler]][[vk::binding(5, 0)]] Texture2DArray msdfTextures : register(t4); +[[vk::combinedImageSampler]][[vk::binding(5, 0)]] SamplerState msdfSampler : register(s4); -[[vk::binding(5, 0)]] SamplerState textureSampler : register(s4); -[[vk::binding(6, 0)]] Texture2D textures[128] : register(t4); +[[vk::binding(6, 0)]] SamplerState textureSampler : register(s5); +[[vk::binding(7, 0)]] Texture2D textures[128] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 845cb36d7..21d114d90 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -427,25 +427,24 @@ float4 fragMain(PSInput input) : SV_TARGET // TODO: figure out if branching can be reduced if (baryCoord.x < baryCoord.y && baryCoord.x < baryCoord.z) { - start = v1; - end = v2; + start = float2(v1.x, v1.y); + end = float2(v2.x, v2.y); } else if (baryCoord.y < baryCoord.x && baryCoord.y < baryCoord.z) { - start = v0; - end = v2; + start = float2(v1.x, v1.y); + end = float2(v2.x, v2.y); } else if (baryCoord.z < baryCoord.x && baryCoord.z < baryCoord.y) { - start = v0; - end = v1; + start = float2(v0.x, v0.y); + end = float2(v1.x, v1.y); } - float distance = nbl::hlsl::numeric_limits::max; const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = 2.0f; - const float phaseShift = 0.0f; - const float stretch = 0.0f; + const float thickness = input.getLineThickness(); + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 0.0f; // input.getPatternStretch(); const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); @@ -453,18 +452,16 @@ float4 fragMain(PSInput input) : SV_TARGET LineStyle style = lineStyles[styleIdx]; - // TODO: stipples - //if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - //{ - //distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); - //} - //else - //{ - // LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - // distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - //} - - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, true); + float distance = nbl::hlsl::numeric_limits::max; + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index f7abd6285..08418c844 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -123,6 +123,14 @@ PSInput main(uint vertexID : SV_VertexID) outV.setHeightAtMeshVertex(vtx.height); outV.setScreenSpaceVertexPos(float3(transformedPos, 1)); + // TODO: line style of contour line has to be set too! + DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; + LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; + LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; + const float screenSpaceLineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfLineThickness = screenSpaceLineWidth * 0.5f; + outV.setLineThickness(sdfLineThickness); + return outV; #else From d4647d588afd7420830d7b45ca6efde0825f1ecc Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 14 Mar 2025 17:58:53 +0100 Subject: [PATCH 006/129] Added `finalizeDTMSettingsCopiesToGPU` function --- 62_CAD/DrawResourcesFiller.cpp | 21 +++++++++++++++++++ 62_CAD/DrawResourcesFiller.h | 2 ++ .../main_pipeline/fragment_shader.hlsl | 13 ++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 995ecfacc..44837e415 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -379,6 +379,7 @@ bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNe success &= finalizeMainObjectCopiesToGPU(intendedNextSubmit); success &= finalizeGeometryCopiesToGPU(intendedNextSubmit); success &= finalizeLineStyleCopiesToGPU(intendedNextSubmit); + success &= finalizeDTMSettingsCopiesToGPU(intendedNextSubmit); success &= finalizeTextureCopies(intendedNextSubmit); return success; } @@ -533,6 +534,26 @@ bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& inte return success; } +bool DrawResourcesFiller::finalizeDTMSettingsCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + // Copy LineStyles + uint32_t remainingLineStyles = currentDTMSettingsCount - inMemDTMSettingsCount; + SBufferRange dtmSettingsRange = { sizeof(DTMSettings) * inMemDTMSettingsCount, sizeof(DTMSettings) * remainingLineStyles, gpuDrawBuffers.dtmSettingsBuffer }; + if (dtmSettingsRange.size > 0u) + { + const DTMSettings* srcDTMSettingsData = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()) + inMemDTMSettingsCount; + if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, dtmSettingsRange, srcDTMSettingsData)) + inMemDTMSettingsCount = currentDTMSettingsCount; + else + { + // TODO: Log + success = false; + } + } + return success; +} + bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit) { msdfTextureArrayIndicesUsed.clear(); // clear msdf textures used in the frame, because the frame finished and called this function. diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index ef7eab307..98dffa90e 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -247,6 +247,8 @@ struct DrawResourcesFiller bool finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); bool finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + + bool finalizeDTMSettingsCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); bool finalizeCustomClipProjectionCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 21d114d90..a4176d1ef 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -441,7 +441,6 @@ float4 fragMain(PSInput input) : SV_TARGET end = float2(v1.x, v1.y); } - const uint32_t styleIdx = mainObj.styleIdx; const float thickness = input.getLineThickness(); const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); const float stretch = 0.0f; // input.getPatternStretch(); @@ -450,17 +449,19 @@ float4 fragMain(PSInput input) : SV_TARGET nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyle style = lineStyles[styleIdx]; + DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; + LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; + LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; float distance = nbl::hlsl::numeric_limits::max; - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag); } else { - LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag, clipper); } localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); From 16951a3e6b9870760437f8531c298355dcd8545b Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 19 Mar 2025 11:42:12 +0100 Subject: [PATCH 007/129] Stippled outline --- 62_CAD/DrawResourcesFiller.cpp | 5 +++-- 62_CAD/main.cpp | 7 +++++-- .../main_pipeline/fragment_shader.hlsl | 20 ++++++++++++++----- .../shaders/main_pipeline/vertex_shader.hlsl | 4 ++++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 44837e415..49c81f3ff 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -276,7 +276,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM uint32_t dtmSettingsIndex = addDTMSettings_SubmitIfNeeded(dtmSettingsInfo, intendedNextSubmit); - drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); + drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, dtmSettingsIndex, intendedNextSubmit); // TODO: use this function later for auto submit //submitCurrentDrawObjectsAndReset(intendedNextSubmit, 0); @@ -522,7 +522,8 @@ bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& inte SBufferRange stylesRange = { sizeof(LineStyle) * inMemLineStylesCount, sizeof(LineStyle) * remainingLineStyles, gpuDrawBuffers.lineStylesBuffer }; if (stylesRange.size > 0u) { - const LineStyle* srcLineStylesData = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()) + inMemLineStylesCount; + LineStyle* srcLineStylesData = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()) + inMemLineStylesCount; + if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, stylesRange, srcLineStylesData)) inMemLineStylesCount = currentLineStylesCount; else diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index cce87e3b2..07df1febb 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3300,7 +3300,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; core::vector indices = { - 0, 1, 3, + 0, 3, 1, 1, 3, 4, 1, 2, 4, 2, 4, 5, @@ -3327,8 +3327,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo outlineStyle = {}; dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 5.0f; + dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 2.0f; dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.5f, 0.5f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; + dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); LineStyleInfo contourStyle = {}; dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; @@ -3337,6 +3339,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, dtmSettingsInfo, intendedNextSubmit); } + drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index a4176d1ef..4449d4687 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -425,6 +425,7 @@ float4 fragMain(PSInput input) : SV_TARGET const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; // TODO: figure out if branching can be reduced + // finding line start and end points by excluding vertex with the lowest barycentric coordinate value if (baryCoord.x < baryCoord.y && baryCoord.x < baryCoord.z) { start = float2(v1.x, v1.y); @@ -432,7 +433,7 @@ float4 fragMain(PSInput input) : SV_TARGET } else if (baryCoord.y < baryCoord.x && baryCoord.y < baryCoord.z) { - start = float2(v1.x, v1.y); + start = float2(v0.x, v0.y); end = float2(v2.x, v2.y); } else if (baryCoord.z < baryCoord.x && baryCoord.z < baryCoord.y) @@ -441,13 +442,20 @@ float4 fragMain(PSInput input) : SV_TARGET end = float2(v1.x, v1.y); } + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (end.x < start.x) + nbl::hlsl::swap(start, end); + else if (end.x == start.x && end.y < start.y) + nbl::hlsl::swap(start, end); + const float thickness = input.getLineThickness(); const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 0.0f; // input.getPatternStretch(); + const float stretch = 1.0f; const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; @@ -460,14 +468,16 @@ float4 fragMain(PSInput input) : SV_TARGET } else { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + printf("stretch = %f, worldToScreenRatio = %f", stretch, worldToScreenRatio); LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag, clipper); } localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + textureColor = float3(outlineStyle.color.x, outlineStyle.color.y, outlineStyle.color.z); } - textureColor = float3(1.0f, 1.0f, 1.0f); return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); // figure out local alpha with sdf @@ -485,7 +495,6 @@ float4 fragMain(PSInput input) : SV_TARGET const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); LineStyle style = lineStyles[styleIdx]; @@ -495,6 +504,7 @@ float4 fragMain(PSInput input) : SV_TARGET } else { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 08418c844..e68ad1dec 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -122,6 +122,10 @@ PSInput main(uint vertexID : SV_VertexID) outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); outV.setHeightAtMeshVertex(vtx.height); outV.setScreenSpaceVertexPos(float3(transformedPos, 1)); + outV.setCurrentWorldToScreenRatio( + _static_cast((_static_cast(2.0f) / + (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) + ); // TODO: line style of contour line has to be set too! DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; From a215f450b8432c27c5ed0a352185e22b34aa2794 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 20 Mar 2025 12:36:03 +0100 Subject: [PATCH 008/129] Contour drawing setup --- 62_CAD/main.cpp | 24 ++++++++--------- 62_CAD/shaders/main_pipeline/common.hlsl | 4 +-- .../main_pipeline/fragment_shader.hlsl | 27 +++++++++++++++++-- .../shaders/main_pipeline/vertex_shader.hlsl | 2 +- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 07df1febb..da3c93acd 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3285,18 +3285,18 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu else if (mode == ExampleMode::CASE_9) { core::vector vertices = { - { float32_t2(-200.0f, -200.0f), 0.0f }, - { float32_t2(-50.0f, -200.0f), 0.0f }, - { float32_t2(100.0f, -200.0f), 0.0f }, - { float32_t2(-125.0f, -70.1f), 0.0f }, - { float32_t2(25.0f, -70.1f), 0.0f }, - { float32_t2(175.0f, -70.1f), 0.0f }, - { float32_t2(-200.0f, 59.8f), 0.0f }, - { float32_t2(-50.0f, 59.8f), 0.0f }, - { float32_t2(100.0f, 59.8f), 0.0f }, - { float32_t2(-125.0f, 189.7f), 0.0f }, - { float32_t2(25.0f, 189.7f), 0.0f }, - { float32_t2(175.0f, 189.7f), 0.0f } + { float32_t2(-200.0f, -200.0f), 10.0f }, + { float32_t2(-50.0f, -200.0f), 50.0f }, + { float32_t2(100.0f, -200.0f), 90.0f }, + { float32_t2(-125.0f, -70.1f), 10.0f }, + { float32_t2(25.0f, -70.1f), 50.0f }, + { float32_t2(175.0f, -70.1f), 90.0f }, + { float32_t2(-200.0f, 59.8f), 10.0f }, + { float32_t2(-50.0f, 59.8f), 50.0f }, + { float32_t2(100.0f, 59.8f), 90.0f }, + { float32_t2(-125.0f, 189.7f), 10.0f }, + { float32_t2(25.0f, 189.7f), 50.0f }, + { float32_t2(175.0f, 189.7f), 90.0f } }; core::vector indices = { diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index dc47604ad..73121fe36 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -104,8 +104,8 @@ struct PSInput void setCurrentWorldToScreenRatio(float worldToScreen) { interp_data5.y = worldToScreen; } float getCurrentWorldToScreenRatio() { return interp_data5.y; } - void setHeightAtMeshVertex(float height) { interp_data5.x = height; } - float getHeightAtMeshVertex() { return interp_data5.x; } + void setHeight(float height) { interp_data5.x = height; } + float getHeight() { return interp_data5.x; } /* LINE */ float2 getLineStart() { return data2.xy; } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 4449d4687..5311fa48d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -414,12 +414,31 @@ float4 fragMain(PSInput input) : SV_TARGET const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = mainObjects[currentMainObjectIdx]; + + // TRIANGLE RENDERING { float3 v0 = input.getScreenSpaceVertexPos(0); float3 v1 = input.getScreenSpaceVertexPos(1); float3 v2 = input.getScreenSpaceVertexPos(2); + // CONTOUR + + // TODO: move to ubo or push constants + const float startHeight = 10.0f; + const float endHeight = 100.0f; + const float interval = 10.0f; + float height = input.getHeight(); + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); + + float backgroundColor = contourLineIdx; + backgroundColor *= 0.1f; + textureColor = float3(backgroundColor, backgroundColor, backgroundColor); + + // OUTLINE + float2 start; float2 end; const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; @@ -469,13 +488,17 @@ float4 fragMain(PSInput input) : SV_TARGET else { nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - printf("stretch = %f, worldToScreenRatio = %f", stretch, worldToScreenRatio); LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag, clipper); } localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); - textureColor = float3(outlineStyle.color.x, outlineStyle.color.y, outlineStyle.color.z); + + // TODO: remove, this is just a hack to draw background + if (localAlpha < 0.00001) + localAlpha = 1.0f; + else + textureColor = float3(outlineStyle.color.x, outlineStyle.color.y, outlineStyle.color.z); } return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index e68ad1dec..a1788a91e 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -120,7 +120,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.position.xy = transformedPos; outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); - outV.setHeightAtMeshVertex(vtx.height); + outV.setHeight(vtx.height); outV.setScreenSpaceVertexPos(float3(transformedPos, 1)); outV.setCurrentWorldToScreenRatio( _static_cast((_static_cast(2.0f) / From 8900f9cf5b2f4c8d424aabbaf5237a371957e21f Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 26 Mar 2025 11:18:25 +0100 Subject: [PATCH 009/129] Implemented height shading --- 62_CAD/CTriangleMesh.h | 52 ++- 62_CAD/DrawResourcesFiller.cpp | 25 +- 62_CAD/main.cpp | 69 +++- 62_CAD/shaders/globals.hlsl | 33 +- 62_CAD/shaders/main_pipeline/common.hlsl | 12 +- .../main_pipeline/fragment_shader.hlsl | 299 ++++++++++++++---- .../shaders/main_pipeline/vertex_shader.hlsl | 11 +- 7 files changed, 422 insertions(+), 79 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index d71198005..a6a86472e 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -8,11 +8,61 @@ using namespace nbl; struct DTMSettingsInfo { + enum E_HEIGHT_SHADING_MODE + { + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS + }; + LineStyleInfo outlineLineStyleInfo; LineStyleInfo contourLineStyleInfo; - // TODO: heights + + float contourLinesStartHeight; + float contourLinesEndHeight; + float contourLinesHeightInterval; + + float minShadingHeight; + float maxShadingHeight; + float intervalWidth; + E_HEIGHT_SHADING_MODE heightShadingMode; + + void addHeightColorMapEntry(uint32_t height, float32_t3 color) + { + heightColorSet.emplace(height, color); + } + + bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const + { + const uint32_t mapSize = heightColorSet.size(); + if (mapSize > DTMSettings::HeightColorMapMaxEntries) + return false; + dtmSettings.heightColorEntryCount = mapSize; + + int index = 0; + for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) + { + dtmSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightColorMapColors[index] = it->color; + ++index; + } + + return true; + } +private: + struct HeightColor + { + uint32_t height; + float32_t3 color; + + bool operator<(const HeightColor& other) const + { + return height < other.height; + } + }; + std::set heightColorSet; }; class CTriangleMesh final diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 49c81f3ff..0611f5900 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -790,12 +790,34 @@ uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineSty uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { DTMSettings dtmSettings; + dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourLinesStartHeight; + dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourLinesEndHeight; + dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourLinesHeightInterval; // TODO: this needs to be redone.. what if submit happens after that line? // we need to make sure somehow that function below will not submit, we need both outline and contour styles in GPU memory dtmSettings.outlineLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.outlineLineStyleInfo, intendedNextSubmit); dtmSettings.contourLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.contourLineStyleInfo, intendedNextSubmit); + dtmSettings.minShadingHeight = dtmSettingsInfo.minShadingHeight; + dtmSettings.maxShadingHeight = dtmSettingsInfo.maxShadingHeight; + switch (dtmSettingsInfo.heightShadingMode) + { + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.intervalWidth = std::numeric_limits::infinity(); + break; + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.intervalWidth = dtmSettingsInfo.intervalWidth; + break; + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.intervalWidth = 0.0f; + break; + } + _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); + + if (currentDTMSettingsCount >= maxDtmSettings) + return InvalidDTMSettingsIdx; + DTMSettings* settingsArray = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()); for (uint32_t i = 0u; i < currentDTMSettingsCount; ++i) { @@ -804,9 +826,6 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm return i; } - if (currentDTMSettingsCount >= maxDtmSettings) - return InvalidDTMSettingsIdx; - void* dst = settingsArray + currentDTMSettingsCount; memcpy(dst, &dtmSettings, sizeof(DTMSettings)); return currentDTMSettingsCount++; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index da3c93acd..53ec24dca 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -72,6 +72,7 @@ constexpr std::array cameraExtents = 10.0, // CASE_6 10.0, // CASE_7 600.0, // CASE_8 + 600.0 // CASE_9 }; constexpr ExampleMode mode = ExampleMode::CASE_9; @@ -3284,7 +3285,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_9) { - core::vector vertices = { + /*core::vector vertices = { { float32_t2(-200.0f, -200.0f), 10.0f }, { float32_t2(-50.0f, -200.0f), 50.0f }, { float32_t2(100.0f, -200.0f), 90.0f }, @@ -3312,30 +3313,80 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 7, 9, 10, 7, 8, 10, 8, 10, 11 + };*/ + + core::vector vertices = { + { float32_t2(0.0f, 0.0f), 100.0f }, + { float32_t2(-200.0f, -200.0f), 10.0f }, + { float32_t2(200.0f, -200.0f), 10.0f }, + { float32_t2(200.0f, 200.0f), -20.0f }, + { float32_t2(-200.0f, 200.0f), 10.0f }, }; - // TODO: height color map - //core::unordered_map heightColorMap; - //heightColorMap.insert({ 0.0f, {0.0f, 1.0f, 0.0f} }); - //heightColorMap.insert({ 100.0f, {0.0f, 1.0f, 0.0f} }); + core::vector indices = { + 0, 1, 2, + 0, 2, 3, + 0, 3, 4, + 0, 4, 1 + }; CTriangleMesh mesh; mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmSettingsInfo; + dtmSettingsInfo.contourLinesStartHeight = 20; + dtmSettingsInfo.contourLinesEndHeight = 90; + dtmSettingsInfo.contourLinesHeightInterval = 10; LineStyleInfo outlineStyle = {}; dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 2.0f; - dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.5f, 0.5f, 1.0f); + dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 3.0f; + dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); LineStyleInfo contourStyle = {}; dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 5.0f; - dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(1.0f, 0.5f, 0.31f, 1.0f); + dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 1.0f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); + + //DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + //DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + + // DISCRETE_VARIABLE_LENGTH_INTERVALS + + switch (shadingModeExample) + { + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmSettingsInfo.minShadingHeight = 20.0f; + dtmSettingsInfo.maxShadingHeight = 70.0f; + dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + dtmSettingsInfo.addHeightColorMapEntry(30, float32_t3(0.5f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(45, float32_t3(0.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(60, float32_t3(1.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(80, float32_t3(1.0f, 0.0f, 0.0f)); + break; + } + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + break; + } + case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmSettingsInfo.minShadingHeight = -10.0f; + dtmSettingsInfo.maxShadingHeight = 100.0f; + dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmSettingsInfo.addHeightColorMapEntry(20, float32_t3(0.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(50, float32_t3(1.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(80, float32_t3(1.0f, 0.0f, 0.0f)); + break; + } + } drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, dtmSettingsInfo, intendedNextSubmit); } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index e7029a79e..d718ee76a 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -330,10 +330,39 @@ struct LineStyle struct DTMSettings { + const static uint32_t HeightColorMapMaxEntries = 16u; uint32_t outlineLineStyleIdx; // index into line styles uint32_t contourLineStyleIdx; // index into line styles - // TODO: - // ContourSettings -> min, max, interval + + // contour lines + float contourLinesStartHeight; + float contourLinesEndHeight; + float contourLinesHeightInterval; + + // height-color map + float minShadingHeight; + float maxShadingHeight; + float intervalWidth; + uint32_t heightColorEntryCount; + float heightColorMapHeights[HeightColorMapMaxEntries]; + float32_t3 heightColorMapColors[HeightColorMapMaxEntries]; + + enum E_HEIGHT_SHADING_MODE + { + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS + }; + + E_HEIGHT_SHADING_MODE determineHeightShadingMode() + { + if (nbl::hlsl::isinf(intervalWidth)) + return DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalWidth == 0.0f) + return CONTINOUS_INTERVALS; + + return DISCRETE_FIXED_LENGTH_INTERVALS; + } }; #ifndef __HLSL_VERSION inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 73121fe36..b2fcda9c2 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -104,9 +104,6 @@ struct PSInput void setCurrentWorldToScreenRatio(float worldToScreen) { interp_data5.y = worldToScreen; } float getCurrentWorldToScreenRatio() { return interp_data5.y; } - void setHeight(float height) { interp_data5.x = height; } - float getHeight() { return interp_data5.x; } - /* LINE */ float2 getLineStart() { return data2.xy; } float2 getLineEnd() { return data2.zw; } @@ -219,6 +216,15 @@ struct PSInput /* TRIANGLE MESH */ + float getOutlineThickness() { return asfloat(data1.z); } + float getContourLineThickness() { return asfloat(data1.w); } + + void setOutlineThickness(float lineThickness) { data1.z = asuint(lineThickness); } + void setContourLineThickness(float stretch) { data1.w = asuint(stretch); } + + void setHeight(float height) { interp_data5.x = height; } + float getHeight() { return interp_data5.x; } + #ifndef FRAGMENT_SHADER_INPUT // vertex shader void setScreenSpaceVertexPos(float3 pos) { vertexScreenSpacePos = pos; } #else // fragment shader diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 5311fa48d..2173ae50f 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -414,91 +414,276 @@ float4 fragMain(PSInput input) : SV_TARGET const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = mainObjects[currentMainObjectIdx]; - - // TRIANGLE RENDERING { - float3 v0 = input.getScreenSpaceVertexPos(0); - float3 v1 = input.getScreenSpaceVertexPos(1); - float3 v2 = input.getScreenSpaceVertexPos(2); + const float outlineThickness = input.getOutlineThickness(); + const float contourThickness = input.getContourLineThickness(); + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 1.0f; // TODO: figure out what is it for + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; + LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; + LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; + + float3 v[3]; + v[0] = input.getScreenSpaceVertexPos(0); + v[1] = input.getScreenSpaceVertexPos(1); + v[2] = input.getScreenSpaceVertexPos(2); + + const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + + // indices of points constructing every edge + uint2 edgePoints[3]; + edgePoints[0] = uint2(0, 1); + edgePoints[1] = uint2(1, 2); + edgePoints[2] = uint2(2, 0); + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + float height = input.getHeight(); + + // HEIGHT SHADING + const bool isHeightBetweenMinAndMax = height >= dtmSettings.minShadingHeight && height <= dtmSettings.maxShadingHeight; + const bool isHeightColorMapNotEmpty = dtmSettings.heightColorEntryCount; + if (isHeightColorMapNotEmpty && isHeightBetweenMinAndMax) + { + DTMSettings::E_HEIGHT_SHADING_MODE mode = dtmSettings.determineHeightShadingMode(); + switch (mode) + { + case DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + for (int i = 0; i < heightMapSize; ++i) + { + if (dtmSettings.heightColorMapHeights[i] > height) + { + textureColor = dtmSettings.heightColorMapColors[i]; + break; + } + } + + localAlpha = 1.0f; + break; + } + case DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + /*const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; + uint32_t lowerBoundHeightIndex; + // TODO: binary search + for (int i = 0; i < heightMapSize; ++i) + { + if (dtmSettings.heightColorMapHeights[i] > height) + { + upperBoundHeightIndex = i; + lowerBoundHeightIndex = i == 0 ? 0 : i - 1; + break; + } + } + + if (upperBoundHeightIndex != nbl::hlsl::numeric_limits::max) + { + float upperBoundHeight = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float lowerBoundHeight = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + + + float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + + localAlpha = 1.0f; + }*/ + + break; + } + case DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + + const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; + uint32_t lowerBoundHeightIndex; + // TODO: binary search + for (int i = 0; i < heightMapSize; ++i) + { + if (dtmSettings.heightColorMapHeights[i] > height) + { + upperBoundHeightIndex = i; + lowerBoundHeightIndex = i; + if (i != 0) + --lowerBoundHeightIndex; + + break; + } + } + if (upperBoundHeightIndex == nbl::hlsl::numeric_limits::max) + { + upperBoundHeightIndex = heightMapSize - 1; + lowerBoundHeightIndex = upperBoundHeightIndex; + if (upperBoundHeightIndex != 0) + --lowerBoundHeightIndex; + } + + if (upperBoundHeightIndex != nbl::hlsl::numeric_limits::max) + { + float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + + float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + printf("idx = %i, t = %f, up = %f, lo = %f", upperBoundHeightIndex, interpolationVal, upperBoundHeight, lowerBoundHeight); + + textureColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + + localAlpha = 1.0f; + } + + break; + } + } + } // CONTOUR // TODO: move to ubo or push constants - const float startHeight = 10.0f; - const float endHeight = 100.0f; - const float interval = 10.0f; - float height = input.getHeight(); + const float startHeight = dtmSettings.contourLinesStartHeight; + const float endHeight = dtmSettings.contourLinesEndHeight; + const float interval = dtmSettings.contourLinesHeightInterval; + + // TODO: can be precomputed + const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; // TODO: it actually can output a negative number, fix int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + // TODO: case where heights we are looking for are on all three vertices + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 3) + break; - float backgroundColor = contourLineIdx; - backgroundColor *= 0.1f; - textureColor = float3(backgroundColor, backgroundColor, backgroundColor); + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; - // OUTLINE + if (p1.z < p0.z) + nbl::hlsl::swap(p0, p1); - float2 start; - float2 end; - const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + float minHeight = p0.z; + float maxHeight = p1.z; - // TODO: figure out if branching can be reduced - // finding line start and end points by excluding vertex with the lowest barycentric coordinate value - if (baryCoord.x < baryCoord.y && baryCoord.x < baryCoord.z) - { - start = float2(v1.x, v1.y); - end = float2(v2.x, v2.y); - } - else if (baryCoord.y < baryCoord.x && baryCoord.y < baryCoord.z) - { - start = float2(v0.x, v0.y); - end = float2(v2.x, v2.y); + if (height >= minHeight && height <= maxHeight) + { + float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); + float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); + + contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); + ++contourLinePointsIdx; + } } - else if (baryCoord.z < baryCoord.x && baryCoord.z < baryCoord.y) + { - start = float2(v0.x, v0.y); - end = float2(v1.x, v1.y); + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + + float distance = nbl::hlsl::numeric_limits::max; + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + + float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + textureColor = lerp(textureColor, contourStyle.color.rgb, contourLocalAlpha); + localAlpha = max(localAlpha, contourLocalAlpha); } - // long story short, in order for stipple patterns to be consistent: - // - point with lesser x coord should be starting point - // - if x coord of both points are equal then point with lesser y value should be starting point - if (end.x < start.x) - nbl::hlsl::swap(start, end); - else if (end.x == start.x && end.y < start.y) - nbl::hlsl::swap(start, end); + - const float thickness = input.getLineThickness(); - const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + // OUTLINE - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + // find sdf of every edge + float triangleAreaTimesTwo; + { + float3 AB = v[0] - v[1]; + float3 AC = v[0] - v[2]; + AB.z = 0.0f; + AC.z = 0.0f; - DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; - LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; - LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; + // TODO: figure out if there is a faster solution + triangleAreaTimesTwo = length(cross(AB, AC)); + } - float distance = nbl::hlsl::numeric_limits::max; + // calculate sdf of every edge as it wasn't stippled + float distances[3]; + for (int i = 0; i < 3; ++i) + { + const uint2 currentEdgePoints = edgePoints[i]; + float3 A = v[currentEdgePoints[0]]; + float3 B = v[currentEdgePoints[1]]; + float3 AB = B - A; + float ABLen = length(AB); + + distances[i] = (triangleAreaTimesTwo / ABLen) * baryCoord[opposingVertexIdx[i]]; + } + + float minDistance = nbl::hlsl::numeric_limits::max; if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag); + for (uint i = 0; i < 3; ++i) + distances[i] -= outlineThickness; + + minDistance = min(distances[0], min(distances[1], distances[2])); } else { - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, outlineStyle.isRoadStyleFlag, clipper); - } + for (int i = 0; i < 3; ++i) + { + if (distances[i] > outlineThickness) + continue; - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; - // TODO: remove, this is just a hack to draw background - if (localAlpha < 0.00001) - localAlpha = 1.0f; - else - textureColor = float3(outlineStyle.color.x, outlineStyle.color.y, outlineStyle.color.z); + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + + } + + float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance); + textureColor = lerp(textureColor, outlineStyle.color.rgb, outlineLocalAlpha); + localAlpha = max(localAlpha, outlineLocalAlpha); } return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index a1788a91e..2853d9a52 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -121,7 +121,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.position.xy = transformedPos; outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); outV.setHeight(vtx.height); - outV.setScreenSpaceVertexPos(float3(transformedPos, 1)); + outV.setScreenSpaceVertexPos(float3(transformedPos, vtx.height)); outV.setCurrentWorldToScreenRatio( _static_cast((_static_cast(2.0f) / (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) @@ -131,9 +131,12 @@ PSInput main(uint vertexID : SV_VertexID) DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; - const float screenSpaceLineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float sdfLineThickness = screenSpaceLineWidth * 0.5f; - outV.setLineThickness(sdfLineThickness); + const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; + const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; + outV.setOutlineThickness(sdfOutlineThickness); + outV.setContourLineThickness(sdfContourLineThickness); return outV; From 5a87097970d77f93ff85a1242f3f1c55623c38f3 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 26 Mar 2025 13:30:28 +0100 Subject: [PATCH 010/129] Fixes --- 62_CAD/CTriangleMesh.h | 6 +- 62_CAD/DrawResourcesFiller.cpp | 3 - 62_CAD/main.cpp | 49 +++--- 62_CAD/shaders/globals.hlsl | 2 - .../main_pipeline/fragment_shader.hlsl | 146 ++++++++---------- 5 files changed, 94 insertions(+), 112 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index a6a86472e..6711011ea 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -22,12 +22,10 @@ struct DTMSettingsInfo float contourLinesEndHeight; float contourLinesHeightInterval; - float minShadingHeight; - float maxShadingHeight; float intervalWidth; E_HEIGHT_SHADING_MODE heightShadingMode; - void addHeightColorMapEntry(uint32_t height, float32_t3 color) + void addHeightColorMapEntry(float height, float32_t3 color) { heightColorSet.emplace(height, color); } @@ -53,7 +51,7 @@ struct DTMSettingsInfo private: struct HeightColor { - uint32_t height; + float height; float32_t3 color; bool operator<(const HeightColor& other) const diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 0611f5900..5e0c85260 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -798,9 +798,6 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm // we need to make sure somehow that function below will not submit, we need both outline and contour styles in GPU memory dtmSettings.outlineLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.outlineLineStyleInfo, intendedNextSubmit); dtmSettings.contourLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.contourLineStyleInfo, intendedNextSubmit); - - dtmSettings.minShadingHeight = dtmSettingsInfo.minShadingHeight; - dtmSettings.maxShadingHeight = dtmSettingsInfo.maxShadingHeight; switch (dtmSettingsInfo.heightShadingMode) { case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 53ec24dca..ba2bf0da0 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -644,6 +644,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu double m_timeElapsed = 0.0; std::chrono::steady_clock::time_point lastTime; uint32_t m_hatchDebugStep = 0u; + DTMSettingsInfo::E_HEIGHT_SHADING_MODE m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + inline bool onAppInitialized(smart_refctd_ptr&& system) override { @@ -1161,6 +1163,18 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { m_hatchDebugStep--; } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) + { + m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) + { + m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) + { + m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + } } } , m_logger.get()); @@ -3353,37 +3367,36 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); - //DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - //DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - DTMSettingsInfo::E_HEIGHT_SHADING_MODE shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - - // DISCRETE_VARIABLE_LENGTH_INTERVALS - - switch (shadingModeExample) + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + switch (m_shadingModeExample) { case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { - dtmSettingsInfo.minShadingHeight = 20.0f; - dtmSettingsInfo.maxShadingHeight = 70.0f; dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(30, float32_t3(0.5f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(45, float32_t3(0.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(60, float32_t3(1.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(80, float32_t3(1.0f, 0.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t3(0.5f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t3(0.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t3(1.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(80.0f, float32_t3(1.0f, 0.0f, 0.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { + dtmSettingsInfo.intervalWidth = 8.0f; + dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t3(0.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t3(1.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t3(1.0f, 0.0f, 0.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { - dtmSettingsInfo.minShadingHeight = -10.0f; - dtmSettingsInfo.maxShadingHeight = 100.0f; dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(20, float32_t3(0.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(50, float32_t3(1.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(80, float32_t3(1.0f, 0.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t3(0.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t3(1.0f, 1.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t3(1.0f, 0.0f, 0.0f)); break; } } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index d718ee76a..7a05819ac 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -340,8 +340,6 @@ struct DTMSettings float contourLinesHeightInterval; // height-color map - float minShadingHeight; - float maxShadingHeight; float intervalWidth; uint32_t heightColorEntryCount; float heightColorMapHeights[HeightColorMapMaxEntries]; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 2173ae50f..ad8a35c82 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -444,112 +444,88 @@ float4 fragMain(PSInput input) : SV_TARGET opposingVertexIdx[0] = 2; opposingVertexIdx[1] = 0; opposingVertexIdx[2] = 1; - + float height = input.getHeight(); // HEIGHT SHADING - const bool isHeightBetweenMinAndMax = height >= dtmSettings.minShadingHeight && height <= dtmSettings.maxShadingHeight; - const bool isHeightColorMapNotEmpty = dtmSettings.heightColorEntryCount; + const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + float minShadingHeight = dtmSettings.heightColorMapHeights[0]; + float maxShadingHeight = dtmSettings.heightColorMapHeights[heightMapSize - 1]; + + printf("min = %f, max = %f", minShadingHeight, maxShadingHeight); + + const bool isHeightBetweenMinAndMax = height >= minShadingHeight && height <= maxShadingHeight; + const bool isHeightColorMapNotEmpty = heightMapSize > 0; if (isHeightColorMapNotEmpty && isHeightBetweenMinAndMax) { DTMSettings::E_HEIGHT_SHADING_MODE mode = dtmSettings.determineHeightShadingMode(); - switch (mode) + + if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { - case DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + for (int i = 0; i < heightMapSize; ++i) { - const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; - for (int i = 0; i < heightMapSize; ++i) + if (dtmSettings.heightColorMapHeights[i] > height) { - if (dtmSettings.heightColorMapHeights[i] > height) - { - textureColor = dtmSettings.heightColorMapColors[i]; - break; - } + textureColor = dtmSettings.heightColorMapColors[i]; + break; } - - localAlpha = 1.0f; - break; } - case DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: - { - /*const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; - uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; - uint32_t lowerBoundHeightIndex; - // TODO: binary search - for (int i = 0; i < heightMapSize; ++i) - { - if (dtmSettings.heightColorMapHeights[i] > height) - { - upperBoundHeightIndex = i; - lowerBoundHeightIndex = i == 0 ? 0 : i - 1; - break; - } - } - - if (upperBoundHeightIndex != nbl::hlsl::numeric_limits::max) - { - float upperBoundHeight = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float lowerBoundHeight = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - - - float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; - localAlpha = 1.0f; - }*/ - - break; + localAlpha = 1.0f; + } + else + { + float heightTmp; + if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float interval = dtmSettings.intervalWidth; + int sectionIndex = int((height - minShadingHeight) / interval); + heightTmp = minShadingHeight + float(sectionIndex) * interval; } - case DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) { + heightTmp = height; + } - const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; - uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; - uint32_t lowerBoundHeightIndex; - // TODO: binary search - for (int i = 0; i < heightMapSize; ++i) - { - if (dtmSettings.heightColorMapHeights[i] > height) - { - upperBoundHeightIndex = i; - lowerBoundHeightIndex = i; - if (i != 0) - --lowerBoundHeightIndex; - break; - } - } - if (upperBoundHeightIndex == nbl::hlsl::numeric_limits::max) + const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; + uint32_t lowerBoundHeightIndex; + // TODO: binary search + for (int i = 0; i < heightMapSize; ++i) + { + if (dtmSettings.heightColorMapHeights[i] > heightTmp) { - upperBoundHeightIndex = heightMapSize - 1; - lowerBoundHeightIndex = upperBoundHeightIndex; - if (upperBoundHeightIndex != 0) + upperBoundHeightIndex = i; + lowerBoundHeightIndex = i; + if (i != 0) --lowerBoundHeightIndex; - } - - if (upperBoundHeightIndex != nbl::hlsl::numeric_limits::max) - { - float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; - - float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; - - float interpolationVal; - if (upperBoundHeightIndex == 0) - interpolationVal = 1.0f; - else - interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - printf("idx = %i, t = %f, up = %f, lo = %f", upperBoundHeightIndex, interpolationVal, upperBoundHeight, lowerBoundHeight); - - textureColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - - localAlpha = 1.0f; + break; } - - break; } + if (upperBoundHeightIndex == nbl::hlsl::numeric_limits::max) + { + upperBoundHeightIndex = heightMapSize - 1; + lowerBoundHeightIndex = upperBoundHeightIndex; + if (upperBoundHeightIndex != 0) + --lowerBoundHeightIndex; + } + + float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + + float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + textureColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + localAlpha = 1.0f; } } From 3237e4b656aa5c7bb80ad08f24f67a1f1d35d3b1 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 26 Mar 2025 15:30:48 +0100 Subject: [PATCH 011/129] Implemented transparent height shading --- 62_CAD/CTriangleMesh.h | 4 +-- 62_CAD/main.cpp | 25 ++++++------- 62_CAD/shaders/globals.hlsl | 2 +- .../main_pipeline/fragment_shader.hlsl | 35 +++++++++---------- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 6711011ea..34fc243f7 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -25,7 +25,7 @@ struct DTMSettingsInfo float intervalWidth; E_HEIGHT_SHADING_MODE heightShadingMode; - void addHeightColorMapEntry(float height, float32_t3 color) + void addHeightColorMapEntry(float height, float32_t4 color) { heightColorSet.emplace(height, color); } @@ -52,7 +52,7 @@ struct DTMSettingsInfo struct HeightColor { float height; - float32_t3 color; + float32_t4 color; bool operator<(const HeightColor& other) const { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ba2bf0da0..bfd346022 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3356,14 +3356,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo outlineStyle = {}; dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 3.0f; - dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 0.5f); std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); LineStyleInfo contourStyle = {}; dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 1.0f; - dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 1.0f); + dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); @@ -3376,27 +3376,28 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t3(0.5f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t3(0.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t3(1.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(80.0f, float32_t3(1.0f, 0.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(80.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { dtmSettingsInfo.intervalWidth = 8.0f; dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t3(0.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t3(1.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t3(1.0f, 0.0f, 0.0f)); + float animatedAlpha = (std::cos(m_timeElapsed * 0.0003) + 1.0) * 0.5; + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t3(0.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t3(1.0f, 1.0f, 0.0f)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t3(1.0f, 0.0f, 0.0f)); + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 7a05819ac..84f9416e3 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -343,7 +343,7 @@ struct DTMSettings float intervalWidth; uint32_t heightColorEntryCount; float heightColorMapHeights[HeightColorMapMaxEntries]; - float32_t3 heightColorMapColors[HeightColorMapMaxEntries]; + float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; enum E_HEIGHT_SHADING_MODE { diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index ad8a35c82..0d5ec486d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -452,8 +452,6 @@ float4 fragMain(PSInput input) : SV_TARGET float minShadingHeight = dtmSettings.heightColorMapHeights[0]; float maxShadingHeight = dtmSettings.heightColorMapHeights[heightMapSize - 1]; - printf("min = %f, max = %f", minShadingHeight, maxShadingHeight); - const bool isHeightBetweenMinAndMax = height >= minShadingHeight && height <= maxShadingHeight; const bool isHeightColorMapNotEmpty = heightMapSize > 0; if (isHeightColorMapNotEmpty && isHeightBetweenMinAndMax) @@ -462,16 +460,24 @@ float4 fragMain(PSInput input) : SV_TARGET if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { + uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; + uint32_t lowerBoundHeightIndex; + // TODO: binary search for (int i = 0; i < heightMapSize; ++i) { if (dtmSettings.heightColorMapHeights[i] > height) { - textureColor = dtmSettings.heightColorMapColors[i]; + upperBoundHeightIndex = i; + lowerBoundHeightIndex = i; + if (i != 0) + --lowerBoundHeightIndex; + break; } } - localAlpha = 1.0f; + textureColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex].rgb; + localAlpha = dtmSettings.heightColorMapColors[upperBoundHeightIndex].a; } else { @@ -487,8 +493,6 @@ float4 fragMain(PSInput input) : SV_TARGET heightTmp = height; } - - const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; uint32_t lowerBoundHeightIndex; // TODO: binary search @@ -504,19 +508,12 @@ float4 fragMain(PSInput input) : SV_TARGET break; } } - if (upperBoundHeightIndex == nbl::hlsl::numeric_limits::max) - { - upperBoundHeightIndex = heightMapSize - 1; - lowerBoundHeightIndex = upperBoundHeightIndex; - if (upperBoundHeightIndex != 0) - --lowerBoundHeightIndex; - } float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; - float3 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float3 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; float interpolationVal; if (upperBoundHeightIndex == 0) @@ -524,8 +521,8 @@ float4 fragMain(PSInput input) : SV_TARGET else interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - textureColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - localAlpha = 1.0f; + textureColor = lerp(lowerBoundColor.rgb, upperBoundColor.rgb, interpolationVal); + localAlpha = lerp(lowerBoundColor.a, upperBoundColor.a, interpolationVal);; } } @@ -587,7 +584,7 @@ float4 fragMain(PSInput input) : SV_TARGET distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); } - float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; textureColor = lerp(textureColor, contourStyle.color.rgb, contourLocalAlpha); localAlpha = max(localAlpha, contourLocalAlpha); } @@ -657,7 +654,7 @@ float4 fragMain(PSInput input) : SV_TARGET } - float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance); + float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; textureColor = lerp(textureColor, outlineStyle.color.rgb, outlineLocalAlpha); localAlpha = max(localAlpha, outlineLocalAlpha); } From 78c716c7b77099220955ef556f51dd4bca92912e Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 27 Mar 2025 13:40:54 +0100 Subject: [PATCH 012/129] Fixes --- 62_CAD/CTriangleMesh.h | 6 +- 62_CAD/DrawResourcesFiller.cpp | 31 ++++--- 62_CAD/main.cpp | 25 ++--- 62_CAD/shaders/globals.hlsl | 6 +- 62_CAD/shaders/main_pipeline/common.hlsl | 6 +- .../main_pipeline/fragment_shader.hlsl | 91 +++++++++---------- .../shaders/main_pipeline/vertex_shader.hlsl | 13 +-- 7 files changed, 88 insertions(+), 90 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 34fc243f7..374fae1b4 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -94,15 +94,15 @@ class CTriangleMesh final return m_indices; } - inline size_t getVtxBuffByteSize() const + inline size_t getVertexBuffByteSize() const { return sizeof(vertex_t) * m_vertices.size(); } - inline size_t getIdxBuffByteSize() const + inline size_t getIndexBuffByteSize() const { return sizeof(index_t) * m_indices.size(); } - inline size_t getIdxCnt() const + inline size_t getIndexCount() const { return m_indices.size(); } diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 5e0c85260..58c4d0c72 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -84,7 +84,7 @@ void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; // INDEX_BUFFER USAGE for DTMs gpuDrawBuffers.geometryBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); gpuDrawBuffers.geometryBuffer->setObjectDebugName("geometryBuffer"); @@ -241,18 +241,18 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM ICPUBuffer::SCreationParams geometryBuffParams; // concatenate the index and vertex buffer into the geometry buffer - const size_t indexBuffByteSize = mesh.getIdxBuffByteSize(); - const size_t vtxBuffByteSize = mesh.getVtxBuffByteSize(); + const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); + const size_t vtxBuffByteSize = mesh.getVertexBuffByteSize(); const size_t geometryBufferDataToAddByteSize = indexBuffByteSize + vtxBuffByteSize; // copy into gemoetry cpu buffer insteaed // TODO: rename, its not just points - const uint32_t maxGeometryBufferPoints = static_cast(maxGeometryBufferSize - currentGeometryBufferSize); + const uint32_t remainingGeometryBufferSize = static_cast(maxGeometryBufferSize - currentGeometryBufferSize); - // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= maxGeometryBufferPoints? + // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= remainingGeometryBufferSize? // TODO: auto submit instead of assert - assert(geometryBufferDataToAddByteSize <= maxGeometryBufferPoints); + assert(geometryBufferDataToAddByteSize <= remainingGeometryBufferSize); // TODO: vertices need to be aligned to 8? uint64_t vtxBufferAddress; @@ -270,7 +270,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM currentGeometryBufferSize += vtxBuffByteSize; } - drawData.indexCount = mesh.getIdxCnt(); + drawData.indexCount = mesh.getIndexCount(); // call addMainObject_SubmitIfNeeded, use its index in push constants @@ -394,6 +394,7 @@ uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& l resetGeometryCounters(); resetMainObjectCounters(); resetLineStyleCounters(); + resetDTMSettingsCounters(); outLineStyleIdx = addLineStyle_Internal(lineStyle); assert(outLineStyleIdx != InvalidStyleIdx); } @@ -410,6 +411,7 @@ uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInf resetGeometryCounters(); resetMainObjectCounters(); resetLineStyleCounters(); + resetDTMSettingsCounters(); outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); assert(outDTMSettingIdx != InvalidDTMSettingsIdx); } @@ -538,9 +540,9 @@ bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& inte bool DrawResourcesFiller::finalizeDTMSettingsCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) { bool success = true; - // Copy LineStyles - uint32_t remainingLineStyles = currentDTMSettingsCount - inMemDTMSettingsCount; - SBufferRange dtmSettingsRange = { sizeof(DTMSettings) * inMemDTMSettingsCount, sizeof(DTMSettings) * remainingLineStyles, gpuDrawBuffers.dtmSettingsBuffer }; + // Copy DTM settings + uint32_t remainingDTMSettings = currentDTMSettingsCount - inMemDTMSettingsCount; + SBufferRange dtmSettingsRange = { sizeof(DTMSettings) * inMemDTMSettingsCount, sizeof(DTMSettings) * remainingDTMSettings, gpuDrawBuffers.dtmSettingsBuffer }; if (dtmSettingsRange.size > 0u) { const DTMSettings* srcDTMSettingsData = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()) + inMemDTMSettingsCount; @@ -794,10 +796,13 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourLinesEndHeight; dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourLinesHeightInterval; - // TODO: this needs to be redone.. what if submit happens after that line? - // we need to make sure somehow that function below will not submit, we need both outline and contour styles in GPU memory + if (currentLineStylesCount + 2 > maxLineStyles) + return InvalidDTMSettingsIdx; + + assert(currentLineStylesCount + 2 <= maxLineStyles); dtmSettings.outlineLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.outlineLineStyleInfo, intendedNextSubmit); dtmSettings.contourLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.contourLineStyleInfo, intendedNextSubmit); + switch (dtmSettingsInfo.heightShadingMode) { case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: @@ -864,7 +869,7 @@ uint64_t DrawResourcesFiller::addClipProjectionData_Internal(const ClipProjectio if (maxGeometryBufferClipProjData <= 0) return InvalidClipProjectionAddress; - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; + uint8_t* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; memcpy(dst, &clipProjectionData, sizeof(ClipProjectionData)); const uint64_t ret = currentGeometryBufferSize + geometryBufferAddress; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index bfd346022..17afb122a 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -646,7 +646,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu uint32_t m_hatchDebugStep = 0u; DTMSettingsInfo::E_HEIGHT_SHADING_MODE m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - inline bool onAppInitialized(smart_refctd_ptr&& system) override { m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -3330,11 +3329,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu };*/ core::vector vertices = { - { float32_t2(0.0f, 0.0f), 100.0f }, - { float32_t2(-200.0f, -200.0f), 10.0f }, - { float32_t2(200.0f, -200.0f), 10.0f }, - { float32_t2(200.0f, 200.0f), -20.0f }, - { float32_t2(-200.0f, 200.0f), 10.0f }, + { float32_t2(0.0, 0.0), 100.0 }, + { float32_t2(-200.0, -200.0), 10.0 }, + { float32_t2(200.0, -200.0), 10.0 }, + { float32_t2(200.0, 200.0), -20.0 }, + { float32_t2(-200.0, 200.0), 10.0 }, }; core::vector indices = { @@ -3376,20 +3375,22 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(1.0f, 1.0f, 1.0f, 1.0f)); dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(80.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { dtmSettingsInfo.intervalWidth = 8.0f; dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - float animatedAlpha = (std::cos(m_timeElapsed * 0.0003) + 1.0) * 0.5; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 84f9416e3..8412b29ad 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -126,7 +126,7 @@ enum class MajorAxis : uint32_t struct MainObject { uint32_t styleIdx; - uint32_t dtmSettingsIdx; // do I even need this on the gpu side? it's stored in structured buffer not bda + uint32_t dtmSettingsIdx; uint64_t clipProjectionAddress; }; @@ -273,8 +273,8 @@ NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits: struct TriangleMeshVertex { - float32_t2 pos; - float32_t height; + pfloat64_t2 pos; + pfloat64_t height; }; // The color parameter is also used for styling non-curve objects such as text glyphs and hatches with solid color diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index b2fcda9c2..261e336f3 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -226,9 +226,9 @@ struct PSInput float getHeight() { return interp_data5.x; } #ifndef FRAGMENT_SHADER_INPUT // vertex shader - void setScreenSpaceVertexPos(float3 pos) { vertexScreenSpacePos = pos; } + void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } #else // fragment shader - float3 getScreenSpaceVertexPos(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } + float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } #endif }; @@ -237,7 +237,7 @@ struct PSInput [[vk::binding(1, 0)]] StructuredBuffer drawObjects : register(t0); [[vk::binding(2, 0)]] StructuredBuffer mainObjects : register(t1); [[vk::binding(3, 0)]] StructuredBuffer lineStyles : register(t2); -[[vk::binding(4, 0)]] StructuredBuffer dtmSettingsBuff : register(t3); +[[vk::binding(4, 0)]] StructuredBuffer dtmSettings : register(t3); [[vk::combinedImageSampler]][[vk::binding(5, 0)]] Texture2DArray msdfTextures : register(t4); [[vk::combinedImageSampler]][[vk::binding(5, 0)]] SamplerState msdfSampler : register(s4); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 0d5ec486d..42a303fc2 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -334,6 +334,18 @@ float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMSettings dtmSettings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return dtmSettings.heightColorMapHeights[ix]; + } +}; + // We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm // because there is no `if constexpr` in hlsl // @params @@ -422,14 +434,14 @@ float4 fragMain(PSInput input) : SV_TARGET const float stretch = 1.0f; // TODO: figure out what is it for const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; - LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; - LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; + DTMSettings dtm = dtmSettings[mainObj.dtmSettingsIdx]; + LineStyle outlineStyle = lineStyles[dtm.outlineLineStyleIdx]; + LineStyle contourStyle = lineStyles[dtm.contourLineStyleIdx]; float3 v[3]; - v[0] = input.getScreenSpaceVertexPos(0); - v[1] = input.getScreenSpaceVertexPos(1); - v[2] = input.getScreenSpaceVertexPos(2); + v[0] = input.getScreenSpaceVertexAttribs(0); + v[1] = input.getScreenSpaceVertexAttribs(1); + v[2] = input.getScreenSpaceVertexAttribs(2); const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; @@ -448,43 +460,31 @@ float4 fragMain(PSInput input) : SV_TARGET float height = input.getHeight(); // HEIGHT SHADING - const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; - float minShadingHeight = dtmSettings.heightColorMapHeights[0]; - float maxShadingHeight = dtmSettings.heightColorMapHeights[heightMapSize - 1]; + const uint32_t heightMapSize = dtm.heightColorEntryCount; + float minShadingHeight = dtm.heightColorMapHeights[0]; + float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; const bool isHeightBetweenMinAndMax = height >= minShadingHeight && height <= maxShadingHeight; const bool isHeightColorMapNotEmpty = heightMapSize > 0; if (isHeightColorMapNotEmpty && isHeightBetweenMinAndMax) { - DTMSettings::E_HEIGHT_SHADING_MODE mode = dtmSettings.determineHeightShadingMode(); + DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { - uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; - uint32_t lowerBoundHeightIndex; - // TODO: binary search - for (int i = 0; i < heightMapSize; ++i) - { - if (dtmSettings.heightColorMapHeights[i] > height) - { - upperBoundHeightIndex = i; - lowerBoundHeightIndex = i; - if (i != 0) - --lowerBoundHeightIndex; + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - break; - } - } - - textureColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex].rgb; - localAlpha = dtmSettings.heightColorMapColors[upperBoundHeightIndex].a; + textureColor = dtm.heightColorMapColors[upperBoundHeightIndex].rgb; + localAlpha = dtm.heightColorMapColors[upperBoundHeightIndex].a; } else { float heightTmp; if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) { - float interval = dtmSettings.intervalWidth; + float interval = dtm.intervalWidth; int sectionIndex = int((height - minShadingHeight) / interval); heightTmp = minShadingHeight + float(sectionIndex) * interval; } @@ -493,27 +493,15 @@ float4 fragMain(PSInput input) : SV_TARGET heightTmp = height; } - uint32_t upperBoundHeightIndex = nbl::hlsl::numeric_limits::max; - uint32_t lowerBoundHeightIndex; - // TODO: binary search - for (int i = 0; i < heightMapSize; ++i) - { - if (dtmSettings.heightColorMapHeights[i] > heightTmp) - { - upperBoundHeightIndex = i; - lowerBoundHeightIndex = i; - if (i != 0) - --lowerBoundHeightIndex; - - break; - } - } + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; float interpolationVal; if (upperBoundHeightIndex == 0) @@ -529,9 +517,9 @@ float4 fragMain(PSInput input) : SV_TARGET // CONTOUR // TODO: move to ubo or push constants - const float startHeight = dtmSettings.contourLinesStartHeight; - const float endHeight = dtmSettings.contourLinesEndHeight; - const float interval = dtmSettings.contourLinesHeightInterval; + const float startHeight = dtm.contourLinesStartHeight; + const float endHeight = dtm.contourLinesEndHeight; + const float interval = dtm.contourLinesHeightInterval; // TODO: can be precomputed const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; @@ -637,6 +625,9 @@ float4 fragMain(PSInput input) : SV_TARGET float3 p0 = v[currentEdgePoints[0]]; float3 p1 = v[currentEdgePoints[1]]; + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point if (p1.x < p0.x) nbl::hlsl::swap(p0, p1); else if (p1.x == p0.x && p1.y < p0.y) diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 2853d9a52..6011defce 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -108,7 +108,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setObjType(ObjectType::TRIANGLE_MESH); outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); - TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 4u); + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); pfloat64_t2 vtxPos; vtxPos.x = _static_cast(vtx.pos.x); vtxPos.y = _static_cast(vtx.pos.y); @@ -120,17 +120,18 @@ PSInput main(uint vertexID : SV_VertexID) outV.position.xy = transformedPos; outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); - outV.setHeight(vtx.height); - outV.setScreenSpaceVertexPos(float3(transformedPos, vtx.height)); + const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); + outV.setHeight(heightAsFloat); + outV.setScreenSpaceVertexAttribs(float3(transformedPos, heightAsFloat)); outV.setCurrentWorldToScreenRatio( _static_cast((_static_cast(2.0f) / (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) ); // TODO: line style of contour line has to be set too! - DTMSettings dtmSettings = dtmSettingsBuff[mainObj.dtmSettingsIdx]; - LineStyle outlineStyle = lineStyles[dtmSettings.outlineLineStyleIdx]; - LineStyle contourStyle = lineStyles[dtmSettings.contourLineStyleIdx]; + DTMSettings dtm = dtmSettings[mainObj.dtmSettingsIdx]; + LineStyle outlineStyle = lineStyles[dtm.outlineLineStyleIdx]; + LineStyle contourStyle = lineStyles[dtm.contourLineStyleIdx]; const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); From 10791e9f48d5563b319b58a7ea47dbd19639abe2 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 29 Mar 2025 15:28:51 +0100 Subject: [PATCH 013/129] Implemented anty aliasing between height shading sections --- 62_CAD/main.cpp | 11 ++++--- .../main_pipeline/fragment_shader.hlsl | 31 ++++++++++++++++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 17afb122a..9fdb4577a 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3377,11 +3377,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(1.0f, 1.0f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + //dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 1.0f, 1.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 42a303fc2..225c0636e 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -473,11 +473,29 @@ float4 fragMain(PSInput input) : SV_TARGET if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + uint32_t mapIndexPlus1 = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t mapIndex = mapIndexPlus1 == 0 ? mapIndexPlus1 : mapIndexPlus1 - 1; + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); + float heightDeriv = fwidth(height); + if (blendWithPrev) + { + float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; + float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); + } + else + { + float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; + float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); + } - textureColor = dtm.heightColorMapColors[upperBoundHeightIndex].rgb; - localAlpha = dtm.heightColorMapColors[upperBoundHeightIndex].a; + localAlpha = dtm.heightColorMapColors[mapIndex].a; } else { @@ -567,6 +585,9 @@ float4 fragMain(PSInput input) : SV_TARGET } else { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); @@ -634,7 +655,7 @@ float4 fragMain(PSInput input) : SV_TARGET nbl::hlsl::swap(p0, p1); nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - + float distance = nbl::hlsl::numeric_limits::max; nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); From 1d9e6d014e09d24ca4da7d83dbce8a206f9f084d Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sun, 30 Mar 2025 08:27:56 +0330 Subject: [PATCH 014/129] switch from dtm to good'ol linework --- 62_CAD/main.cpp | 2 +- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 3 ++- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 9fdb4577a..eff8fd3e1 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_9; +constexpr ExampleMode mode = ExampleMode::CASE_4; class Camera2D { diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 225c0636e..cddac89ba 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -426,6 +426,7 @@ float4 fragMain(PSInput input) : SV_TARGET const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = mainObjects[currentMainObjectIdx]; +#ifdef DTM // TRIANGLE RENDERING { const float outlineThickness = input.getOutlineThickness(); @@ -672,7 +673,7 @@ float4 fragMain(PSInput input) : SV_TARGET } return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); - +#endif // figure out local alpha with sdf if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 6011defce..f7af0d8a6 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -94,7 +94,6 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. -#define DTM #ifdef DTM PSInput outV; From ad43e200d1f30f218d6b7a0e6fbf696311df1bbb Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 31 Mar 2025 10:36:31 +0330 Subject: [PATCH 015/129] [WIP] putting all data into a single buffer and addressing with BDA --- 62_CAD/DrawResourcesFiller.cpp | 329 +++++++----------- 62_CAD/DrawResourcesFiller.h | 196 ++++------- 62_CAD/main.cpp | 106 ++---- 62_CAD/shaders/globals.hlsl | 79 ++++- 62_CAD/shaders/main_pipeline/common.hlsl | 15 +- .../main_pipeline/fragment_shader.hlsl | 28 +- .../shaders/main_pipeline/resolve_alphas.hlsl | 4 +- .../shaders/main_pipeline/vertex_shader.hlsl | 27 +- 8 files changed, 308 insertions(+), 476 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 58c4d0c72..8c1a42719 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -15,70 +15,35 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) submitDraws = func; } -void DrawResourcesFiller::allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t maxIndices) -{ - maxIndexCount = maxIndices; - const size_t indexBufferSize = maxIndices * sizeof(index_buffer_type); - auto indexBuffer = ICPUBuffer::create({ indexBufferSize }); - - index_buffer_type* indices = reinterpret_cast(indexBuffer->getPointer()); - for (uint32_t i = 0u; i < maxIndices / 6u; ++i) - { - index_buffer_type objIndex = i; - indices[i * 6] = objIndex * 4u + 1u; - indices[i * 6 + 1u] = objIndex * 4u + 0u; - indices[i * 6 + 2u] = objIndex * 4u + 2u; - - indices[i * 6 + 3u] = objIndex * 4u + 1u; - indices[i * 6 + 4u] = objIndex * 4u + 2u; - indices[i * 6 + 5u] = objIndex * 4u + 3u; - } - - IGPUBuffer::SCreationParams indexBufferCreationParams = {}; - indexBufferCreationParams.size = indexBufferSize; - indexBufferCreationParams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - - m_utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=m_copyQueue}, std::move(indexBufferCreationParams), indices).move_into(gpuDrawBuffers.indexBuffer); - gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); -} - -void DrawResourcesFiller::allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects) -{ - maxMainObjects = mainObjects; - size_t mainObjectsBufferSize = maxMainObjects * sizeof(MainObject); - - IGPUBuffer::SCreationParams mainObjectsCreationParams = {}; - mainObjectsCreationParams.size = mainObjectsBufferSize; - mainObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.mainObjectsBuffer = logicalDevice->createBuffer(std::move(mainObjectsCreationParams)); - gpuDrawBuffers.mainObjectsBuffer->setObjectDebugName("mainObjectsBuffer"); - - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.mainObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto mainObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.mainObjectsBuffer.get()); - - cpuDrawBuffers.mainObjectsBuffer = ICPUBuffer::create({ mainObjectsBufferSize }); -} - -void DrawResourcesFiller::allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects) -{ - maxDrawObjects = drawObjects; - size_t drawObjectsBufferSize = maxDrawObjects * sizeof(DrawObject); - - IGPUBuffer::SCreationParams drawObjectsCreationParams = {}; - drawObjectsCreationParams.size = drawObjectsBufferSize; - drawObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.drawObjectsBuffer = logicalDevice->createBuffer(std::move(drawObjectsCreationParams)); - gpuDrawBuffers.drawObjectsBuffer->setObjectDebugName("drawObjectsBuffer"); - - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.drawObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto drawObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.drawObjectsBuffer.get()); - - cpuDrawBuffers.drawObjectsBuffer = ICPUBuffer::create({ drawObjectsBufferSize }); -} - -void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size) +//void DrawResourcesFiller::allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t maxIndices) +//{ +// maxIndexCount = maxIndices; +// const size_t indexBufferSize = maxIndices * sizeof(index_buffer_type); +// auto indexBuffer = ICPUBuffer::create({ indexBufferSize }); +// +// index_buffer_type* indices = reinterpret_cast(indexBuffer->getPointer()); +// for (uint32_t i = 0u; i < maxIndices / 6u; ++i) +// { +// index_buffer_type objIndex = i; +// indices[i * 6] = objIndex * 4u + 1u; +// indices[i * 6 + 1u] = objIndex * 4u + 0u; +// indices[i * 6 + 2u] = objIndex * 4u + 2u; +// +// indices[i * 6 + 3u] = objIndex * 4u + 1u; +// indices[i * 6 + 4u] = objIndex * 4u + 2u; +// indices[i * 6 + 5u] = objIndex * 4u + 3u; +// } +// +// IGPUBuffer::SCreationParams indexBufferCreationParams = {}; +// indexBufferCreationParams.size = indexBufferSize; +// indexBufferCreationParams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; +// +// m_utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=m_copyQueue}, std::move(indexBufferCreationParams), indices).move_into(gpuDrawBuffers.indexBuffer); +// gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); +//} + + +void DrawResourcesFiller::allocateDrawResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { maxGeometryBufferSize = size; @@ -91,49 +56,11 @@ void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.geometryBuffer->getMemoryReqs(); memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); auto geometryBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.geometryBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - geometryBufferAddress = gpuDrawBuffers.geometryBuffer->getDeviceAddress(); + drawResourcesBDA = gpuDrawBuffers.geometryBuffer->getDeviceAddress(); cpuDrawBuffers.geometryBuffer = ICPUBuffer::create({ size }); } -void DrawResourcesFiller::allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount) -{ - { - maxLineStyles = lineStylesCount; - size_t lineStylesBufferSize = lineStylesCount * sizeof(LineStyle); - - IGPUBuffer::SCreationParams lineStylesCreationParams = {}; - lineStylesCreationParams.size = lineStylesBufferSize; - lineStylesCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.lineStylesBuffer = logicalDevice->createBuffer(std::move(lineStylesCreationParams)); - gpuDrawBuffers.lineStylesBuffer->setObjectDebugName("lineStylesBuffer"); - - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.lineStylesBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto stylesBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.lineStylesBuffer.get()); - - cpuDrawBuffers.lineStylesBuffer = ICPUBuffer::create({ lineStylesBufferSize }); - } -} - -void DrawResourcesFiller::allocateDTMSettingsBuffer(ILogicalDevice* logicalDevice, uint32_t dtmSettingsCount) -{ - maxDtmSettings = dtmSettingsCount; - size_t dtmSettingsBufferSize = dtmSettingsCount * sizeof(DTMSettings); - - IGPUBuffer::SCreationParams dtmSettingsCreationParams = {}; - dtmSettingsCreationParams.size = dtmSettingsBufferSize; - dtmSettingsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.dtmSettingsBuffer = logicalDevice->createBuffer(std::move(dtmSettingsCreationParams)); - gpuDrawBuffers.dtmSettingsBuffer->setObjectDebugName("dtmSettingsBuffer"); - - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.dtmSettingsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto stylesBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.dtmSettingsBuffer.get()); - - cpuDrawBuffers.dtmSettingsBuffer = ICPUBuffer::create({ dtmSettingsBufferSize }); -} - void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); @@ -265,7 +192,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM currentGeometryBufferSize += indexBuffByteSize; dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - drawData.pushConstants.triangleMeshVerticesBaseAddress = geometryBufferAddress + currentGeometryBufferSize; + drawData.pushConstants.triangleMeshVerticesBaseAddress = drawResourcesBDA + currentGeometryBufferSize; memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); currentGeometryBufferSize += vtxBuffByteSize; } @@ -373,13 +300,56 @@ void DrawResourcesFiller::drawFontGlyph( } } +void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) +{ + auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool + { + const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); + uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; + uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + uploadableObjects = core::min(uploadableObjects, maxGeometryBufferImageObjects); + + if (uploadableObjects >= 1u) + { + void* dstGeom = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; + memcpy(dstGeom, &imageObjectInfo, sizeof(ImageObjectInfo)); + uint64_t geomBufferAddr = drawResourcesBDA + currentGeometryBufferSize; + currentGeometryBufferSize += sizeof(ImageObjectInfo); + + DrawObject drawObj = {}; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.mainObjIndex = mainObjIdx; + drawObj.geometryAddress = geomBufferAddr; + void* dstDrawObj = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; + memcpy(dstDrawObj, &drawObj, sizeof(DrawObject)); + currentDrawObjectCount += 1u; + + return true; + } + else + return false; + }; + + uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); + + ImageObjectInfo info = {}; + info.topLeft = topLeftPos; + info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // + info.aspectRatio = size.y / size.x; + info.textureID = 0u; + if (!addImageObject_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + bool success = addImageObject_Internal(info, mainObjIdx); + assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object + } +} + bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) { bool success = true; - success &= finalizeMainObjectCopiesToGPU(intendedNextSubmit); - success &= finalizeGeometryCopiesToGPU(intendedNextSubmit); - success &= finalizeLineStyleCopiesToGPU(intendedNextSubmit); - success &= finalizeDTMSettingsCopiesToGPU(intendedNextSubmit); + success &= finalizeBufferCopies(intendedNextSubmit); success &= finalizeTextureCopies(intendedNextSubmit); return success; } @@ -461,100 +431,59 @@ void DrawResourcesFiller::popClipProjectionData() clipProjectionAddresses.pop_back(); } -bool DrawResourcesFiller::finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - // Copy MainObjects - uint32_t remainingMainObjects = currentMainObjectCount - inMemMainObjectCount; - SBufferRange mainObjectsRange = { sizeof(MainObject) * inMemMainObjectCount, sizeof(MainObject) * remainingMainObjects, gpuDrawBuffers.mainObjectsBuffer }; - if (mainObjectsRange.size > 0u) - { - const MainObject* srcMainObjData = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()) + inMemMainObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, mainObjectsRange, srcMainObjData)) - inMemMainObjectCount = currentMainObjectCount; - else - { - // TODO: Log - success = false; - } - } - return success; -} + size_t offset = 0ull; -bool DrawResourcesFiller::finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) -{ - bool success = true; - // Copy DrawObjects - uint32_t remainingDrawObjects = currentDrawObjectCount - inMemDrawObjectCount; - SBufferRange drawObjectsRange = { sizeof(DrawObject) * inMemDrawObjectCount, sizeof(DrawObject) * remainingDrawObjects, gpuDrawBuffers.drawObjectsBuffer }; - if (drawObjectsRange.size > 0u) - { - const DrawObject* srcDrawObjData = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + inMemDrawObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, drawObjectsRange, srcDrawObjData)) - inMemDrawObjectCount = currentDrawObjectCount; - else - { - // TODO: Log - success = false; - } - } + assert(drawBuffers.calculateTotalConsumption() <= drawResourcesGPUBuffer->getSize()); - // Copy GeometryBuffer - uint64_t remainingGeometrySize = currentGeometryBufferSize - inMemGeometryBufferSize; - SBufferRange geomRange = { inMemGeometryBufferSize, remainingGeometrySize, gpuDrawBuffers.geometryBuffer }; - if (geomRange.size > 0u) - { - const uint8_t* srcGeomData = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + inMemGeometryBufferSize; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, geomRange, srcGeomData)) - inMemGeometryBufferSize = currentGeometryBufferSize; - else + auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool { - // TODO: Log - success = false; - } - } - return success; -} + // drawBuffer must be of type CPUFilledDrawBuffer + SBufferRange copyRange = { offset, drawBuffer.getStorageSize(), drawResourcesGPUBuffer}; -bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) -{ - bool success = true; - // Copy LineStyles - uint32_t remainingLineStyles = currentLineStylesCount - inMemLineStylesCount; - SBufferRange stylesRange = { sizeof(LineStyle) * inMemLineStylesCount, sizeof(LineStyle) * remainingLineStyles, gpuDrawBuffers.lineStylesBuffer }; - if (stylesRange.size > 0u) - { - LineStyle* srcLineStylesData = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()) + inMemLineStylesCount; + if (copyRange.offset + copyRange.size > drawResourcesGPUBuffer->getSize()) + { + // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism + assert(false); + return false; + } - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, stylesRange, srcLineStylesData)) - inMemLineStylesCount = currentLineStylesCount; - else + if (copyRange.size > 0ull) + { + drawBuffer.bufferOffset = copyRange.offset; + if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + return false; + offset += drawBuffer.getAlignedStorageSize(); + } + return true; + }; + + auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool { - // TODO: Log - success = false; - } - } - return success; -} + // drawBuffer must be of type ComputeReservedDrawBuffer + SBufferRange copyRange = { offset, drawBuffer.getStorageSize(), drawResourcesGPUBuffer}; -bool DrawResourcesFiller::finalizeDTMSettingsCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) -{ - bool success = true; - // Copy DTM settings - uint32_t remainingDTMSettings = currentDTMSettingsCount - inMemDTMSettingsCount; - SBufferRange dtmSettingsRange = { sizeof(DTMSettings) * inMemDTMSettingsCount, sizeof(DTMSettings) * remainingDTMSettings, gpuDrawBuffers.dtmSettingsBuffer }; - if (dtmSettingsRange.size > 0u) - { - const DTMSettings* srcDTMSettingsData = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()) + inMemDTMSettingsCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, dtmSettingsRange, srcDTMSettingsData)) - inMemDTMSettingsCount = currentDTMSettingsCount; - else - { - // TODO: Log - success = false; - } - } - return success; + if (copyRange.offset + copyRange.size > drawResourcesGPUBuffer->getSize()) + { + // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + offset += drawBuffer.getAlignedStorageSize(); + }; + + copyCPUFilledDrawBuffer(drawBuffers.lineStyles); + copyCPUFilledDrawBuffer(drawBuffers.dtmSettings); + copyCPUFilledDrawBuffer(drawBuffers.clipProjections); + copyCPUFilledDrawBuffer(drawBuffers.mainObjects); + copyCPUFilledDrawBuffer(drawBuffers.drawObjects); + copyCPUFilledDrawBuffer(drawBuffers.indexBuffer); + copyCPUFilledDrawBuffer(drawBuffers.geometryInfo); + + return true; } bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit) @@ -872,7 +801,7 @@ uint64_t DrawResourcesFiller::addClipProjectionData_Internal(const ClipProjectio uint8_t* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; memcpy(dst, &clipProjectionData, sizeof(ClipProjectionData)); - const uint64_t ret = currentGeometryBufferSize + geometryBufferAddress; + const uint64_t ret = currentGeometryBufferSize + drawResourcesBDA; currentGeometryBufferSize += sizeof(ClipProjectionData); return ret; } @@ -904,7 +833,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::POLYLINE_CONNECTOR) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; for (uint32_t i = 0u; i < objectsToUpload; ++i) { void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; @@ -946,7 +875,7 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::LINE) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; for (uint32_t i = 0u; i < objectsToUpload; ++i) { void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; @@ -987,7 +916,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // Add DrawObjs DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; for (uint32_t i = 0u; i < objectsToUpload; ++i) { for (uint16_t subObject = 0; subObject < CagesPerQuadBezier; subObject++) @@ -1033,7 +962,7 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; memcpy(dst, &hatchBox, sizeof(CurveBox)); - hatchBoxAddress = geometryBufferAddress + currentGeometryBufferSize; + hatchBoxAddress = drawResourcesBDA + currentGeometryBufferSize; currentGeometryBufferSize += sizeof(CurveBox); } @@ -1062,7 +991,7 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint { void* geomDst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; memcpy(geomDst, &glyphInfo, sizeof(GlyphInfo)); - uint64_t fontGlyphAddr = geometryBufferAddress + currentGeometryBufferSize; + uint64_t fontGlyphAddr = drawResourcesBDA + currentGeometryBufferSize; currentGeometryBufferSize += sizeof(GlyphInfo); DrawObject drawObj = {}; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 98dffa90e..bc456f806 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -14,22 +14,10 @@ using namespace nbl::asset; using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 16u); -static_assert(sizeof(Globals) == 128u); +static_assert(sizeof(MainObject) == 12u); static_assert(sizeof(LineStyle) == 88u); static_assert(sizeof(ClipProjectionData) == 88u); -template -struct DrawBuffers -{ - smart_refctd_ptr indexBuffer; // only is valid for IGPUBuffer because it's filled at allocation time and never touched again - smart_refctd_ptr mainObjectsBuffer; - smart_refctd_ptr drawObjectsBuffer; - smart_refctd_ptr geometryBuffer; - smart_refctd_ptr lineStylesBuffer; - smart_refctd_ptr dtmSettingsBuffer; -}; - // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. // ! Drawing new objects (polylines, hatches, etc.) should go through this function. @@ -39,9 +27,67 @@ struct DrawBuffers struct DrawResourcesFiller { public: + + /// @brief general parent struct for 1.ComputeReserved and 2.CPUFilled DrawBuffers + struct DrawBuffer + { + static constexpr size_t Alignment = 8u; + static constexpr size_t InvalidBufferOffset = ~0u; + size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued + virtual size_t getCount() const = 0; + virtual size_t getStorageSize() const = 0; + virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), Alignment); } + }; + + /// @brief DrawBuffer reserved for compute shader stages input/output + template + struct ComputeReservedDrawBuffer : DrawBuffer + { + size_t count = 0ull; + size_t getCount() const override { return count; } + size_t getStorageSize() const override { return count * sizeof(T); } + }; + + /// @brief DrawBuffer which is filled by CPU, packed and sent to GPU + template + struct CPUFilledDrawBuffer : DrawBuffer + { + core::vector vector; + size_t getCount() const { return vector.size(); } + size_t getStorageSize() const { return vector.size() * sizeof(T); } + }; + + /// @brief struct to hold all draw buffers + struct DrawBuffers + { + // auto-submission level 0 buffers (settings that mainObj references) + CPUFilledDrawBuffer lineStyles; + CPUFilledDrawBuffer dtmSettings; + CPUFilledDrawBuffer clipProjections; + + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) + CPUFilledDrawBuffer mainObjects; - typedef uint32_t index_buffer_type; + // auto-submission level 2 buffers + CPUFilledDrawBuffer drawObjects; + CPUFilledDrawBuffer indexBuffer; + CPUFilledDrawBuffer geometryInfo; // general purpose byte buffer for custom geometries, etc + // Get Total memory consumption, If all DrawBuffers get packed together with DrawBuffer::Alignment + // Useful to know when to know when to overflow + size_t calculateTotalConsumption() const + { + return + lineStyles.getAlignedStorageSize() + + dtmSettings.getAlignedStorageSize() + + clipProjections.getAlignedStorageSize() + + mainObjects.getAlignedStorageSize() + + drawObjects.getAlignedStorageSize() + + indexBuffer.getAlignedStorageSize() + + geometryInfo.getAlignedStorageSize(); + } + }; + DrawResourcesFiller(); DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue); @@ -49,18 +95,8 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); - void allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t indices); - - void allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects); - - void allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects); + void allocateDrawResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); - void allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size); - - void allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount); - - void allocateDTMSettingsBuffer(ILogicalDevice* logicalDevice, uint32_t dtmSettingsCount); - void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); // functions that user should set to get MSDF texture if it's not available in cache. @@ -118,80 +154,10 @@ struct DrawResourcesFiller float64_t2 topLeftPos, float32_t2 size, float32_t rotation, - SIntendedSubmitInfo& intendedNextSubmit) - { - auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool - { - const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferImageObjects); - - if (uploadableObjects >= 1u) - { - void* dstGeom = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dstGeom, &imageObjectInfo, sizeof(ImageObjectInfo)); - uint64_t geomBufferAddr = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(ImageObjectInfo); - - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geomBufferAddr; - void* dstDrawObj = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dstDrawObj, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; - - return true; - } - else - return false; - }; - - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); - - ImageObjectInfo info = {}; - info.topLeft = topLeftPos; - info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // - info.aspectRatio = size.y / size.x; - info.textureID = 0u; - if (!addImageObject_Internal(info, mainObjIdx)) - { - // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addImageObject_Internal(info, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object - } - } + SIntendedSubmitInfo& intendedNextSubmit); bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - inline uint32_t getLineStyleCount() const { return currentLineStylesCount; } - - inline uint32_t getDrawObjectCount() const { return currentDrawObjectCount; } - - inline uint32_t getMainObjectCount() const { return currentMainObjectCount; } - - inline size_t getCurrentMainObjectsBufferSize() const - { - return sizeof(MainObject) * currentMainObjectCount; - } - - inline size_t getCurrentDrawObjectsBufferSize() const - { - return sizeof(DrawObject) * currentDrawObjectCount; - } - - inline size_t getCurrentGeometryBufferSize() const - { - return currentGeometryBufferSize; - } - - inline size_t getCurrentLineStylesBufferSize() const - { - return sizeof(LineStyle) * currentLineStylesCount; - } - void reset() { resetGeometryCounters(); @@ -200,8 +166,8 @@ struct DrawResourcesFiller resetDTMSettingsCounters(); } - DrawBuffers cpuDrawBuffers; - DrawBuffers gpuDrawBuffers; + DrawBuffers drawBuffers; // will be compacted and copied into gpu draw resources + nbl::core::smart_refctd_ptr drawResourcesGPUBuffer; uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); @@ -242,16 +208,8 @@ struct DrawResourcesFiller SubmitFunc submitDraws; - bool finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - bool finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + bool finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit); - bool finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - bool finalizeDTMSettingsCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - bool finalizeCustomClipProjectionCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - bool finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit); // Internal Function to call whenever we overflow while filling our buffers with geometry (potential limiters: indexBuffer, drawObjectsBuffer or geometryBuffer) @@ -430,29 +388,7 @@ struct DrawResourcesFiller smart_refctd_ptr m_utilities; IQueue* m_copyQueue; - uint32_t maxIndexCount; - - uint32_t inMemMainObjectCount = 0u; - uint32_t currentMainObjectCount = 0u; - uint32_t maxMainObjects = 0u; - - uint32_t inMemDrawObjectCount = 0u; - uint32_t currentDrawObjectCount = 0u; - uint32_t maxDrawObjects = 0u; - - uint64_t inMemGeometryBufferSize = 0u; - uint64_t currentGeometryBufferSize = 0u; - uint64_t maxGeometryBufferSize = 0u; - - uint32_t inMemLineStylesCount = 0u; - uint32_t currentLineStylesCount = 0u; - uint32_t maxLineStyles = 0u; - - uint32_t inMemDTMSettingsCount = 0u; - uint32_t currentDTMSettingsCount = 0u; - uint32_t maxDtmSettings = 0u; - - uint64_t geometryBufferAddress = 0u; // Actual BDA offset 0 of the gpu buffer + uint64_t drawResourcesBDA = 0u; // Actual BDA offset 0 of the gpu buffer std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. std::deque clipProjectionAddresses; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index eff8fd3e1..7dd60ca47 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -288,19 +288,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); - // TODO: move individual allocations to DrawResourcesFiller::allocateResources(memory) - // Issue warning error, if we can't store our largest geomm struct + clip proj data inside geometry buffer along linestyle and mainObject - uint32_t maxIndices = maxObjects * 6u * 2u; - drawResourcesFiller.allocateIndexBuffer(m_device.get(), maxIndices); - drawResourcesFiller.allocateMainObjectsBuffer(m_device.get(), maxObjects); - drawResourcesFiller.allocateDrawObjectsBuffer(m_device.get(), maxObjects * 5u); - drawResourcesFiller.allocateStylesBuffer(m_device.get(), 512u); - drawResourcesFiller.allocateDTMSettingsBuffer(m_device.get(), 512u); - - // * 3 because I just assume there is on average 3x beziers per actual object (cause we approximate other curves/arcs with beziers now) - // + 128 ClipProjData - size_t geometryBufferSize = maxObjects * sizeof(QuadraticBezierInfo) * 3 + 128 * sizeof(ClipProjectionData); - drawResourcesFiller.allocateGeometryBuffer(m_device.get(), geometryBufferSize); + size_t bufferSize = 512u * 1024u * 1024u; // 512 MB + drawResourcesFiller.allocateDrawResourcesBuffer(m_device.get(), bufferSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { @@ -314,14 +303,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get()); } - size_t sumBufferSizes = - drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.geometryBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getSize(); - m_logger->log("Buffers Size = %.2fKB", ILogger::E_LOG_LEVEL::ELL_INFO, sumBufferSizes / 1024.0f); - // pseudoStencil { asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT; @@ -778,7 +759,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout0)); descriptorSet1 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout1)); - constexpr uint32_t DescriptorCountSet0 = 7u; + constexpr uint32_t DescriptorCountSet0 = 3u; video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; // Descriptors For Set 0: @@ -786,31 +767,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size; descriptorInfosSet0[0u].desc = m_globalsBuffer; - descriptorInfosSet0[1u].info.buffer.offset = 0u; - descriptorInfosSet0[1u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[1u].desc = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer; - - descriptorInfosSet0[2u].info.buffer.offset = 0u; - descriptorInfosSet0[2u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[2u].desc = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer; - - descriptorInfosSet0[3u].info.buffer.offset = 0u; - descriptorInfosSet0[3u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getCreationParams().size; - descriptorInfosSet0[3u].desc = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer; - - descriptorInfosSet0[4u].info.buffer.offset = 0u; - descriptorInfosSet0[4u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.dtmSettingsBuffer->getCreationParams().size; - descriptorInfosSet0[4u].desc = drawResourcesFiller.gpuDrawBuffers.dtmSettingsBuffer; - - descriptorInfosSet0[5u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfosSet0[5u].info.combinedImageSampler.sampler = msdfTextureSampler; - descriptorInfosSet0[5u].desc = drawResourcesFiller.getMSDFsTextureArray(); + descriptorInfosSet0[1u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfosSet0[1u].info.combinedImageSampler.sampler = msdfTextureSampler; + descriptorInfosSet0[1u].desc = drawResourcesFiller.getMSDFsTextureArray(); - descriptorInfosSet0[6u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? + descriptorInfosSet0[2u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? // This is bindless to we write to it later. - // descriptorInfosSet0[6u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - // descriptorInfosSet0[6u].desc = drawResourcesFiller.getMSDFsTextureArray(); + // descriptorInfosSet0[3u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + // descriptorInfosSet0[3u].desc = drawResourcesFiller.getMSDFsTextureArray(); // Descriptors For Set 1: constexpr uint32_t DescriptorCountSet1 = 2u; @@ -834,60 +799,32 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorUpdates[0u].count = 1u; descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; - // drawObjectsBuffer + // mdfs textures descriptorUpdates[1u].dstSet = descriptorSet0.get(); descriptorUpdates[1u].binding = 1u; descriptorUpdates[1u].arrayElement = 0u; descriptorUpdates[1u].count = 1u; descriptorUpdates[1u].info = &descriptorInfosSet0[1u]; - - // mainObjectsBuffer + + // general texture sampler descriptorUpdates[2u].dstSet = descriptorSet0.get(); descriptorUpdates[2u].binding = 2u; descriptorUpdates[2u].arrayElement = 0u; descriptorUpdates[2u].count = 1u; descriptorUpdates[2u].info = &descriptorInfosSet0[2u]; - // lineStylesBuffer - descriptorUpdates[3u].dstSet = descriptorSet0.get(); - descriptorUpdates[3u].binding = 3u; + // Set 1 Updates: + descriptorUpdates[3u].dstSet = descriptorSet1.get(); + descriptorUpdates[3u].binding = 0u; descriptorUpdates[3u].arrayElement = 0u; descriptorUpdates[3u].count = 1u; - descriptorUpdates[3u].info = &descriptorInfosSet0[3u]; - - // dtmSettingsBuffer - descriptorUpdates[4u].dstSet = descriptorSet0.get(); - descriptorUpdates[4u].binding = 4u; + descriptorUpdates[3u].info = &descriptorInfosSet1[0u]; + + descriptorUpdates[4u].dstSet = descriptorSet1.get(); + descriptorUpdates[4u].binding = 1u; descriptorUpdates[4u].arrayElement = 0u; descriptorUpdates[4u].count = 1u; - descriptorUpdates[4u].info = &descriptorInfosSet0[4u]; - - // mdfs textures - descriptorUpdates[5u].dstSet = descriptorSet0.get(); - descriptorUpdates[5u].binding = 5u; - descriptorUpdates[5u].arrayElement = 0u; - descriptorUpdates[5u].count = 1u; - descriptorUpdates[5u].info = &descriptorInfosSet0[5u]; - - // mdfs samplers - descriptorUpdates[6u].dstSet = descriptorSet0.get(); - descriptorUpdates[6u].binding = 6u; - descriptorUpdates[6u].arrayElement = 0u; - descriptorUpdates[6u].count = 1u; - descriptorUpdates[6u].info = &descriptorInfosSet0[6u]; - - // Set 1 Updates: - descriptorUpdates[7u].dstSet = descriptorSet1.get(); - descriptorUpdates[7u].binding = 0u; - descriptorUpdates[7u].arrayElement = 0u; - descriptorUpdates[7u].count = 1u; - descriptorUpdates[7u].info = &descriptorInfosSet1[0u]; - - descriptorUpdates[8u].dstSet = descriptorSet1.get(); - descriptorUpdates[8u].binding = 1u; - descriptorUpdates[8u].arrayElement = 0u; - descriptorUpdates[8u].count = 1u; - descriptorUpdates[8u].info = &descriptorInfosSet1[1u]; + descriptorUpdates[4u].info = &descriptorInfosSet1[1u]; m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); } @@ -2977,7 +2914,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu default: m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!",ILogger::ELL_ERROR); } - // create matching size gpu image smart_refctd_ptr gpuImg; @@ -3015,7 +2951,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { { .dstSet = descriptorSet0.get(), - .binding = 6u, + .binding = 3u, .arrayElement = 0u, .count = 1u, .info = &dsInfo, diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 8412b29ad..f9c89d45c 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -53,24 +53,32 @@ static_assert(offsetof(ClipProjectionData, minClipNDC) == 72u); static_assert(offsetof(ClipProjectionData, maxClipNDC) == 80u); #endif -struct Globals +struct Pointers { - ClipProjectionData defaultClipProjection; // 88 - pfloat64_t screenToWorldRatio; // 96 - pfloat64_t worldToScreenRatio; // 100 - uint32_t2 resolution; // 108 - float antiAliasingFactor; // 112 - float miterLimit; // 116 - float32_t2 _padding; // 128 + uint64_t lineStyles; + uint64_t dtmSettings; + uint64_t customClipProjections; + uint64_t mainObjects; + uint64_t drawObjects; + uint64_t geometryBuffer; }; +#ifndef __HLSL_VERSION +static_assert(sizeof(Pointers) == 48u); +#endif +struct Globals +{ + Pointers pointers; + ClipProjectionData defaultClipProjection; + pfloat64_t screenToWorldRatio; + pfloat64_t worldToScreenRatio; + uint32_t2 resolution; + float antiAliasingFactor; + float miterLimit; + float32_t2 _padding; +}; #ifndef __HLSL_VERSION -static_assert(offsetof(Globals, defaultClipProjection) == 0u); -static_assert(offsetof(Globals, screenToWorldRatio) == 88u); -static_assert(offsetof(Globals, worldToScreenRatio) == 96u); -static_assert(offsetof(Globals, resolution) == 104u); -static_assert(offsetof(Globals, antiAliasingFactor) == 112u); -static_assert(offsetof(Globals, miterLimit) == 116u); +static_assert(sizeof(Globals) == 176u); #endif #ifdef __HLSL_VERSION @@ -127,7 +135,7 @@ struct MainObject { uint32_t styleIdx; uint32_t dtmSettingsIdx; - uint64_t clipProjectionAddress; + uint32_t clipProjectionIndex; }; struct DrawObject @@ -137,6 +145,8 @@ struct DrawObject uint64_t geometryAddress; }; + +// Goes into geometry buffer, needs to be aligned by 8 struct LinePointInfo { pfloat64_t2 p; @@ -144,6 +154,7 @@ struct LinePointInfo float32_t stretchValue; }; +// Goes into geometry buffer, needs to be aligned by 8 struct QuadraticBezierInfo { nbl::hlsl::shapes::QuadraticBezier shape; // 48bytes = 3 (control points) x 16 (float64_t2) @@ -154,6 +165,7 @@ struct QuadraticBezierInfo static_assert(offsetof(QuadraticBezierInfo, phaseShift) == 48u); #endif +// Goes into geometry buffer, needs to be aligned by 8 struct GlyphInfo { pfloat64_t2 topLeft; // 2 * 8 = 16 bytes @@ -198,6 +210,7 @@ struct GlyphInfo } }; +// Goes into geometry buffer, needs to be aligned by 8 struct ImageObjectInfo { pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) @@ -247,6 +260,7 @@ struct PolylineConnector }; // NOTE: Don't attempt to pack curveMin/Max to uints because of limited range of values, we need the logarithmic precision of floats (more precision near 0) +// Goes into geometry buffer, needs to be aligned by 8 struct CurveBox { // will get transformed in the vertex shader, and will be calculated on the cpu when generating these boxes @@ -362,6 +376,7 @@ struct DTMSettings return DISCRETE_FIXED_LENGTH_INTERVALS; } }; + #ifndef __HLSL_VERSION inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) { @@ -390,7 +405,6 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) return lhs.outlineLineStyleIdx == rhs.outlineLineStyleIdx && lhs.contourLineStyleIdx == rhs.contourLineStyleIdx; } - #endif NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture @@ -399,15 +413,44 @@ NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint64_t InvalidClipProjectionAddress = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidClipProjectionIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; + +// Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; -// TODO: get automatic version working on HLSL NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); + +// Text or MSDF Hatches NBL_CONSTEXPR float MSDFPixelRange = 4.0f; NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; NBL_CONSTEXPR float MSDFSize = 32.0f; NBL_CONSTEXPR uint32_t MSDFMips = 4; NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); + +LineStyle loadLineStyle(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.lineStyles + index * sizeof(LineStyle), 8u); +} +DTMSettings loadDTMSettings(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSetting), 8u); +} +ClipProjectionData loadCustomClipProjection(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customClipProjections + index * sizeof(ClipProjectionData), 8u); +} +MainObject loadMainObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.mainObjs + index * sizeof(MainObject), 8u); +} +DrawObject loadDrawObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.drawObjs + index * sizeof(DrawObject), 8u); +} +#endif + + #endif diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 261e336f3..0cf4e3bce 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -233,17 +233,14 @@ struct PSInput }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); -[[vk::binding(1, 0)]] StructuredBuffer drawObjects : register(t0); -[[vk::binding(2, 0)]] StructuredBuffer mainObjects : register(t1); -[[vk::binding(3, 0)]] StructuredBuffer lineStyles : register(t2); -[[vk::binding(4, 0)]] StructuredBuffer dtmSettings : register(t3); -[[vk::combinedImageSampler]][[vk::binding(5, 0)]] Texture2DArray msdfTextures : register(t4); -[[vk::combinedImageSampler]][[vk::binding(5, 0)]] SamplerState msdfSampler : register(s4); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl -[[vk::binding(6, 0)]] SamplerState textureSampler : register(s5); -[[vk::binding(7, 0)]] Texture2D textures[128] : register(t5); +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] Texture2DArray msdfTextures : register(t4); +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState msdfSampler : register(s4); + +[[vk::binding(2, 0)]] SamplerState textureSampler : register(s5); +[[vk::binding(3, 0)]] Texture2D textures[128] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index cddac89ba..4852d0522 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -357,10 +357,10 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, co template<> float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) { - uint32_t styleIdx = mainObjects[currentMainObjectIdx].styleIdx; + uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; if (!colorFromTexture) { - float32_t4 col = lineStyles[styleIdx].color; + float32_t4 col = loadLineStyle(styleIdx).color; col.w *= localAlpha; return float4(col); } @@ -387,7 +387,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store if (resolve) { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); } @@ -409,7 +409,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; + color = loadLineStyle(toResolveStyleIdx).color; color.a *= float(storedQuantizedAlpha) / 255.f; return color; @@ -424,7 +424,7 @@ float4 fragMain(PSInput input) : SV_TARGET ObjectType objType = input.getObjType(); const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); - const MainObject mainObj = mainObjects[currentMainObjectIdx]; + const MainObject mainObj = loadMainObject(currentMainObjectIdx); #ifdef DTM // TRIANGLE RENDERING @@ -435,9 +435,9 @@ float4 fragMain(PSInput input) : SV_TARGET const float stretch = 1.0f; // TODO: figure out what is it for const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - DTMSettings dtm = dtmSettings[mainObj.dtmSettingsIdx]; - LineStyle outlineStyle = lineStyles[dtm.outlineLineStyleIdx]; - LineStyle contourStyle = lineStyles[dtm.contourLineStyleIdx]; + DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); float3 v[3]; v[0] = input.getScreenSpaceVertexAttribs(0); @@ -690,7 +690,7 @@ float4 fragMain(PSInput input) : SV_TARGET nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - LineStyle style = lineStyles[styleIdx]; + LineStyle style = loadLineStyle(styleIdx); if (!style.hasStipples() || stretch == InvalidStyleStretchValue) { @@ -699,7 +699,7 @@ float4 fragMain(PSInput input) : SV_TARGET else { nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); } } @@ -714,14 +714,14 @@ float4 fragMain(PSInput input) : SV_TARGET const float stretch = input.getPatternStretch(); const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - LineStyle style = lineStyles[styleIdx]; + LineStyle style = loadLineStyle(styleIdx); if (!style.hasStipples() || stretch == InvalidStyleStretchValue) { distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); } else { - BezierStyleClipper clipper = BezierStyleClipper::construct(lineStyles[styleIdx], quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); } } @@ -847,7 +847,7 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); } - LineStyle style = lineStyles[mainObj.styleIdx]; + LineStyle style = loadLineStyle(mainObj.styleIdx); uint32_t textureId = asuint(style.screenSpaceLineWidth); if (textureId != InvalidTextureIdx) { @@ -883,7 +883,7 @@ float4 fragMain(PSInput input) : SV_TARGET */ msdf *= exp2(max(mipLevel,0.0)); - LineStyle style = lineStyles[mainObj.styleIdx]; + LineStyle style = loadLineStyle(mainObj.styleIdx); const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index 46c5d28e0..c75c86825 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -32,7 +32,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store if (resolve) { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); } @@ -45,7 +45,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; + color = loadLineStyle(toResolveStyleIdx).color; color.a *= float(storedQuantizedAlpha) / 255.f; return color; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index f7af0d8a6..5abe693ec 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -27,19 +27,10 @@ float2 QuadraticBezier(float2 p0, float2 p1, float2 p2, float t) ClipProjectionData getClipProjectionData(in MainObject mainObj) { - if (mainObj.clipProjectionAddress != InvalidClipProjectionAddress) - { - ClipProjectionData ret; - ret.projectionToNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress, 8u); - ret.minClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3), 8u); - ret.maxClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3) + sizeof(float32_t2), 8u); - - return ret; - } + if (mainObj.clipProjectionIndex != InvalidClipProjectionIndex) + return loadCustomClipProjection(mainObj.clipProjectionIndex); else - { return globals.defaultClipProjection; - } } float2 transformPointScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolution, pfloat64_t2 point2d) @@ -112,7 +103,7 @@ PSInput main(uint vertexID : SV_VertexID) vtxPos.x = _static_cast(vtx.pos.x); vtxPos.y = _static_cast(vtx.pos.y); - MainObject mainObj = mainObjects[pc.triangleMeshMainObjectIndex]; + MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); @@ -129,8 +120,8 @@ PSInput main(uint vertexID : SV_VertexID) // TODO: line style of contour line has to be set too! DTMSettings dtm = dtmSettings[mainObj.dtmSettingsIdx]; - LineStyle outlineStyle = lineStyles[dtm.outlineLineStyleIdx]; - LineStyle contourStyle = lineStyles[dtm.contourLineStyleIdx]; + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); @@ -145,7 +136,7 @@ PSInput main(uint vertexID : SV_VertexID) const uint vertexIdx = vertexID & 0x3u; const uint objectID = vertexID >> 2; - DrawObject drawObj = drawObjects[objectID]; + DrawObject drawObj = loadDrawObject(objectID); ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; @@ -161,13 +152,13 @@ PSInput main(uint vertexID : SV_VertexID) outV.setObjType(objType); outV.setMainObjectIdx(drawObj.mainObjIndex); - MainObject mainObj = mainObjects[drawObj.mainObjIndex]; + MainObject mainObj = loadMainObject(drawObj.mainObjIndex); ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); // We only need these for Outline type objects like lines and bezier curves if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); // Width is on both sides, thickness is one one side of the curve (div by 2.0f) const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); @@ -545,7 +536,7 @@ PSInput main(uint vertexID : SV_VertexID) } else if (objType == ObjectType::FONT_GLYPH) { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style GlyphInfo glyphInfo; From cf63282ffadcfdf083e3813b86196e4abb85c4e8 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 31 Mar 2025 14:39:55 +0330 Subject: [PATCH 016/129] [WIP] save work --- 62_CAD/DrawResourcesFiller.cpp | 88 ++++++++++++++++---------------- 62_CAD/DrawResourcesFiller.h | 56 ++++++++++++--------- 62_CAD/main.cpp | 91 ++++++++++------------------------ 3 files changed, 104 insertions(+), 131 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 8c1a42719..710df3cb9 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -45,20 +45,16 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) void DrawResourcesFiller::allocateDrawResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { - maxGeometryBufferSize = size; - + size = core::alignUp(size, BDALoadAlignment); IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; // INDEX_BUFFER USAGE for DTMs - gpuDrawBuffers.geometryBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); - gpuDrawBuffers.geometryBuffer->setObjectDebugName("geometryBuffer"); + geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.geometryBuffer->getMemoryReqs(); + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto geometryBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.geometryBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - drawResourcesBDA = gpuDrawBuffers.geometryBuffer->getDeviceAddress(); - - cpuDrawBuffers.geometryBuffer = ICPUBuffer::create({ size }); + auto mem = logicalDevice->allocate(memReq, resourcesGPUBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); } void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) @@ -170,31 +166,37 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM // concatenate the index and vertex buffer into the geometry buffer const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); const size_t vtxBuffByteSize = mesh.getVertexBuffByteSize(); - const size_t geometryBufferDataToAddByteSize = indexBuffByteSize + vtxBuffByteSize; + const size_t dataToAddByteSize = vtxBuffByteSize + indexBuffByteSize; // copy into gemoetry cpu buffer insteaed + const size_t totalResourcesConsumption = resourcesCollection.calculateTotalConsumption(); + // TODO: rename, its not just points - const uint32_t remainingGeometryBufferSize = static_cast(maxGeometryBufferSize - currentGeometryBufferSize); + const uint32_t remainingResourcesSize = static_cast(resourcesGPUBuffer->getSize() - totalResourcesConsumption); - // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= remainingGeometryBufferSize? + // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= remainingResourcesSize? // TODO: auto submit instead of assert - assert(geometryBufferDataToAddByteSize <= remainingGeometryBufferSize); + assert(dataToAddByteSize <= remainingResourcesSize); - // TODO: vertices need to be aligned to 8? - uint64_t vtxBufferAddress; { - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - void* dst1 = dst; + // NOTE[ERFAN]: these push contants will be removed, everything will be accessed by dtmSettings, including where the vertex buffer data resides + auto& geometryBytesVector = resourcesCollection.geometryInfo.vector; + size_t geometryBufferOffset = core::alignUp(geometryBytesVector.size(), BDALoadAlignment); + geometryBytesVector.resize(geometryBufferOffset + dataToAddByteSize); + + // Copy VertexBuffer + void* dst = geometryBytesVector.data() + geometryBufferOffset; + // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` + drawData.pushConstants.triangleMeshVerticesBaseAddress = geometryBufferOffset; + memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); + geometryBufferOffset += vtxBuffByteSize; - drawData.indexBufferOffset = currentGeometryBufferSize; + // Copy IndexBuffer + dst = geometryBytesVector.data() + geometryBufferOffset; + drawData.indexBufferOffset = geometryBufferOffset; memcpy(dst, mesh.getIndices().data(), indexBuffByteSize); - currentGeometryBufferSize += indexBuffByteSize; - - dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - drawData.pushConstants.triangleMeshVerticesBaseAddress = drawResourcesBDA + currentGeometryBufferSize; - memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); - currentGeometryBufferSize += vtxBuffByteSize; + geometryBufferOffset += indexBuffByteSize; } drawData.indexCount = mesh.getIndexCount(); @@ -433,16 +435,16 @@ void DrawResourcesFiller::popClipProjectionData() bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) { - size_t offset = 0ull; + copiedResourcesSize = 0ull; - assert(drawBuffers.calculateTotalConsumption() <= drawResourcesGPUBuffer->getSize()); + assert(resourcesCollection.calculateTotalConsumption() <= resourcesGPUBuffer->getSize()); auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool { - // drawBuffer must be of type CPUFilledDrawBuffer - SBufferRange copyRange = { offset, drawBuffer.getStorageSize(), drawResourcesGPUBuffer}; + // drawBuffer must be of type CPUGeneratedResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; - if (copyRange.offset + copyRange.size > drawResourcesGPUBuffer->getSize()) + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism assert(false); @@ -454,17 +456,17 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext drawBuffer.bufferOffset = copyRange.offset; if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) return false; - offset += drawBuffer.getAlignedStorageSize(); + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); } return true; }; auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool { - // drawBuffer must be of type ComputeReservedDrawBuffer - SBufferRange copyRange = { offset, drawBuffer.getStorageSize(), drawResourcesGPUBuffer}; + // drawBuffer must be of type ReservedComputeResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; - if (copyRange.offset + copyRange.size > drawResourcesGPUBuffer->getSize()) + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism assert(false); @@ -472,17 +474,17 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext } drawBuffer.bufferOffset = copyRange.offset; - offset += drawBuffer.getAlignedStorageSize(); + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); }; - copyCPUFilledDrawBuffer(drawBuffers.lineStyles); - copyCPUFilledDrawBuffer(drawBuffers.dtmSettings); - copyCPUFilledDrawBuffer(drawBuffers.clipProjections); - copyCPUFilledDrawBuffer(drawBuffers.mainObjects); - copyCPUFilledDrawBuffer(drawBuffers.drawObjects); - copyCPUFilledDrawBuffer(drawBuffers.indexBuffer); - copyCPUFilledDrawBuffer(drawBuffers.geometryInfo); - + copyCPUFilledDrawBuffer(resourcesCollection.lineStyles); + copyCPUFilledDrawBuffer(resourcesCollection.dtmSettings); + copyCPUFilledDrawBuffer(resourcesCollection.clipProjections); + copyCPUFilledDrawBuffer(resourcesCollection.mainObjects); + copyCPUFilledDrawBuffer(resourcesCollection.drawObjects); + copyCPUFilledDrawBuffer(resourcesCollection.indexBuffer); + copyCPUFilledDrawBuffer(resourcesCollection.geometryInfo); + return true; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index bc456f806..47c4ba146 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -28,53 +28,54 @@ struct DrawResourcesFiller { public: - /// @brief general parent struct for 1.ComputeReserved and 2.CPUFilled DrawBuffers - struct DrawBuffer + static constexpr size_t BDALoadAlignment = 8u; + + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources + struct ResourceBase { - static constexpr size_t Alignment = 8u; static constexpr size_t InvalidBufferOffset = ~0u; size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued virtual size_t getCount() const = 0; virtual size_t getStorageSize() const = 0; - virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), Alignment); } + virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), BDALoadAlignment); } }; - /// @brief DrawBuffer reserved for compute shader stages input/output + /// @brief ResourceBase reserved for compute shader stages input/output template - struct ComputeReservedDrawBuffer : DrawBuffer + struct ReservedComputeResource : ResourceBase { size_t count = 0ull; size_t getCount() const override { return count; } size_t getStorageSize() const override { return count * sizeof(T); } }; - /// @brief DrawBuffer which is filled by CPU, packed and sent to GPU + /// @brief ResourceBase which is filled by CPU, packed and sent to GPU template - struct CPUFilledDrawBuffer : DrawBuffer + struct CPUGeneratedResource : ResourceBase { core::vector vector; size_t getCount() const { return vector.size(); } size_t getStorageSize() const { return vector.size() * sizeof(T); } }; - /// @brief struct to hold all draw buffers - struct DrawBuffers + /// @brief struct to hold all resources + struct ResourcesCollection { // auto-submission level 0 buffers (settings that mainObj references) - CPUFilledDrawBuffer lineStyles; - CPUFilledDrawBuffer dtmSettings; - CPUFilledDrawBuffer clipProjections; + CPUGeneratedResource lineStyles; + CPUGeneratedResource dtmSettings; + CPUGeneratedResource clipProjections; // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) - CPUFilledDrawBuffer mainObjects; + CPUGeneratedResource mainObjects; // auto-submission level 2 buffers - CPUFilledDrawBuffer drawObjects; - CPUFilledDrawBuffer indexBuffer; - CPUFilledDrawBuffer geometryInfo; // general purpose byte buffer for custom geometries, etc + CPUGeneratedResource drawObjects; + CPUGeneratedResource indexBuffer; + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom geometries, etc - // Get Total memory consumption, If all DrawBuffers get packed together with DrawBuffer::Alignment - // Useful to know when to know when to overflow + // Get Total memory consumption, If all ResourcesCollection get packed together with BDALoadAlignment + // used to decide when to overflow size_t calculateTotalConsumption() const { return @@ -166,8 +167,14 @@ struct DrawResourcesFiller resetDTMSettingsCounters(); } - DrawBuffers drawBuffers; // will be compacted and copied into gpu draw resources - nbl::core::smart_refctd_ptr drawResourcesGPUBuffer; + /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders + const ResourcesCollection& getResourcesCollection() const { return &resourcesCollection; } + + /// @brief buffer containing all non-texture type resources + nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } + + /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` + const size_t getCopiedResourcesSize() { return copiedResourcesSize; } uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); @@ -384,12 +391,15 @@ struct DrawResourcesFiller // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + // ResourcesCollection and packed into GPUBuffer + ResourcesCollection resourcesCollection; + nbl::core::smart_refctd_ptr resourcesGPUBuffer; + size_t copiedResourcesSize; + // Members smart_refctd_ptr m_utilities; IQueue* m_copyQueue; - uint64_t drawResourcesBDA = 0u; // Actual BDA offset 0 of the gpu buffer - std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. std::deque clipProjectionAddresses; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 7dd60ca47..020b7cf6c 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1188,6 +1188,16 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu projectionToNDC = m_Camera.constructViewProjection(); Globals globalData = {}; + uint64_t baseAddress = drawResourcesFiller.getResourcesGPUBuffer()->getDeviceAddress(); + const auto& resources = drawResourcesFiller.getResourcesCollection(); + globalData.pointers = { + .lineStyles = baseAddress + resources.lineStyles.bufferOffset, + .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, + .customClipProjections = baseAddress + resources.customClipProjections.bufferOffset, + .mainObjects = baseAddress + resources.mainObjects.bufferOffset, + .drawObjects = baseAddress + resources.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resources.geometryBuffer.bufferOffset, + }; globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; globalData.defaultClipProjection.projectionToNDC = projectionToNDC; @@ -1254,25 +1264,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pipelineBarriersBeforeDraw { - constexpr uint32_t MaxBufferBarriersCount = 6u; + constexpr uint32_t MaxBufferBarriersCount = 2u; uint32_t bufferBarriersCount = 0u; IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; + + const auto& resources = drawResourcesFiller.getResourcesCollection(); - // Index Buffer Copy Barrier -> Only do once at the beginning of the frames - if (m_realFrameIx == 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::INDEX_READ_BIT; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer, - }; - } if (m_globalsBuffer->getSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; @@ -1287,35 +1284,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu .buffer = m_globalsBuffer, }; } - if (drawResourcesFiller.getCurrentDrawObjectsBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentDrawObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentGeometryBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentGeometryBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer, - }; - } - if (drawResourcesFiller.getCurrentMainObjectsBufferSize() > 0u) + if (drawResourcesFiller.getCopiedResourcesSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; @@ -1325,22 +1294,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu bufferBarrier.range = { .offset = 0u, - .size = drawResourcesFiller.getCurrentMainObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentLineStylesBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentLineStylesBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer, + .size = drawResourcesFiller.getCopiedResourcesSize(), + .buffer = drawResourcesFiller.getResourcesGPUBuffer(), }; } cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} }); @@ -1365,22 +1320,27 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + const auto& resources = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + const uint32_t currentIndexCount = resources.drawObjects.getCount() * 6u; - const uint32_t currentIndexCount = drawResourcesFiller.getDrawObjectCount() * 6u; IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + if (mode == ExampleMode::CASE_9) { // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = m_triangleMeshDrawData.indexBufferOffset, .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer.get() }, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resources.geometryInfo.bufferOffset + m_triangleMeshDrawData.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); // TODO[Przemek]: binding the same pipelie, no need to change. cb->bindGraphicsPipeline(graphicsPipeline.get()); // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. - + m_triangleMeshDrawData.pushConstants.triangleMeshVerticesBaseAddress += resourcesGPUBuffer->getDeviceAddress() + resources.geometryInfo.bufferOffset; cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(PushConstants), &m_triangleMeshDrawData.pushConstants); // TODO[Przemek]: draw parameters needs to reflect the mesh involved @@ -1388,7 +1348,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else { - cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); + assert(currentIndexCount == resources.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resources.indexBuffer.bufferOffset, .buffer = resourcesGPUBuffer.get() }, asset::EIT_32BIT); cb->bindGraphicsPipeline(graphicsPipeline.get()); cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); } From d776d24fbdc7e497ee1329ab9d95598890cc1357 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 31 Mar 2025 14:53:40 +0330 Subject: [PATCH 017/129] [WIP] add getMinimumRequiredResourcesBufferSize function, still not compiling --- 62_CAD/DrawResourcesFiller.cpp | 11 +++++++---- 62_CAD/DrawResourcesFiller.h | 10 +++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 710df3cb9..d9280cb76 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -42,10 +42,11 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) // gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); //} - -void DrawResourcesFiller::allocateDrawResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) +void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { + size = core::alignUp(size, BDALoadAlignment); + size = core::max(size, getMinimumRequiredResourcesBufferSize()); IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; @@ -221,8 +222,8 @@ void DrawResourcesFiller::drawHatch( { // TODO[Optimization Idea]: don't draw hatch twice if both colors are visible: instead do the msdf inside the alpha resolve by detecting mainObj being a hatch // https://discord.com/channels/593902898015109131/856835291712716820/1228337893366300743 - // TODO: Come back to this idea when doing color resolve for ecws (they don't have mainObj/style Index, instead they have uv into a texture - + // TODO: Come back to this idea when doing color resolve for ecws (they don't have mainObj/style Index, instead they have uv into a texture + // if backgroundColor is visible drawHatch(hatch, backgroundColor, intendedNextSubmit); // if foregroundColor is visible @@ -268,6 +269,7 @@ void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); } +// TODO: FIX void DrawResourcesFiller::drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, @@ -302,6 +304,7 @@ void DrawResourcesFiller::drawFontGlyph( } } +// TODO: FIX void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) { auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 47c4ba146..a8010a7ec 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -95,8 +95,16 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); + + /// @brief Get minimum required size for resources buffer (containing objects and geometry info and their settings) + consteval size_t getMinimumRequiredResourcesBufferSize() const + { + // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer + // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) + return core::alignUp(sizeof(LineStyle) * 2u + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, BDALoadAlignment); + } - void allocateDrawResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); + void allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); From 4dc28d57be6d72cbe779f36bf12599769a7c7d7e Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 01:21:07 +0330 Subject: [PATCH 018/129] [WIP] more drawResource auto-submission logic fixes --- 62_CAD/DrawResourcesFiller.cpp | 229 ++++++++++++++------------------- 62_CAD/DrawResourcesFiller.h | 76 +++++------ 62_CAD/main.cpp | 2 +- 3 files changed, 129 insertions(+), 178 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index d9280cb76..1eff63552 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -44,8 +44,7 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { - - size = core::alignUp(size, BDALoadAlignment); + size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; @@ -169,12 +168,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM const size_t vtxBuffByteSize = mesh.getVertexBuffByteSize(); const size_t dataToAddByteSize = vtxBuffByteSize + indexBuffByteSize; - // copy into gemoetry cpu buffer insteaed - - const size_t totalResourcesConsumption = resourcesCollection.calculateTotalConsumption(); - - // TODO: rename, its not just points - const uint32_t remainingResourcesSize = static_cast(resourcesGPUBuffer->getSize() - totalResourcesConsumption); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); // TODO: assert of geometry buffer size, do i need to check if size of objects to be added <= remainingResourcesSize? // TODO: auto submit instead of assert @@ -183,8 +177,8 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM { // NOTE[ERFAN]: these push contants will be removed, everything will be accessed by dtmSettings, including where the vertex buffer data resides auto& geometryBytesVector = resourcesCollection.geometryInfo.vector; - size_t geometryBufferOffset = core::alignUp(geometryBytesVector.size(), BDALoadAlignment); - geometryBytesVector.resize(geometryBufferOffset + dataToAddByteSize); + size_t geometryBufferOffset = core::alignUp(geometryBytesVector.size(), ResourcesMaxNaturalAlignment); + geometryBytesVector.resize(geometryBufferOffset + dataToAddByteSize); // this will increase total resources consumption and reduce remainingResourcesSize --> no need to update any size trackers // Copy VertexBuffer void* dst = geometryBytesVector.data() + geometryBufferOffset; @@ -207,9 +201,6 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM uint32_t dtmSettingsIndex = addDTMSettings_SubmitIfNeeded(dtmSettingsInfo, intendedNextSubmit); drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, dtmSettingsIndex, intendedNextSubmit); - - // TODO: use this function later for auto submit - //submitCurrentDrawObjectsAndReset(intendedNextSubmit, 0); } // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor @@ -361,32 +352,46 @@ bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNe uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) { + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? + uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); if (outLineStyleIdx == InvalidStyleIdx) { + // There wasn't enough resource memory remaining to fit a single LineStyle finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - resetDTMSettingsCounters(); + + // resets itself + resetLineStyles(); + // resets higher level resources + resetMainObjects(); + resetDrawObjects(); + outLineStyleIdx = addLineStyle_Internal(lineStyle); assert(outLineStyleIdx != InvalidStyleIdx); } + return outLineStyleIdx; } uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) { + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); - if (outDTMSettingIdx == InvalidStyleIdx) + if (outDTMSettingIdx == InvalidDTMSettingsIdx) { + // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - resetDTMSettingsCounters(); + + // resets itself + resetDTMSettings(); + resetLineStyles(); // additionally resets linestyles as well, just to be safe + // resets higher level resources + resetMainObjects(); + resetDrawObjects(); + outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); assert(outDTMSettingIdx != InvalidDTMSettingsIdx); } @@ -402,18 +407,17 @@ uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, ui uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); if (outMainObjectIdx == InvalidMainObjectIdx) { + // failed to fit into remaining resources mem or exceeded max indexable mainobj finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - // geometries needs to be reset because they reference draw objects and draw objects reference main objects that are now unavailable and reset - resetGeometryCounters(); - // mainObjects needs to be reset because we submitted every previous main object - resetMainObjectCounters(); - // we shouldn't reset linestyles and clip projections here because it was possibly requested to push to mem before addMainObjects - // but clip projections are reset due to geometry/bda buffer being reset so we need to push again - // acquireCurrentClipProjectionAddress again here because clip projection should exist in the geometry buffer, and reseting geometry counters will invalidate the current clip proj and requires repush - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); + // resets itself + resetMainObjects(); + // resets higher level resources + resetDrawObjects(); + // we shouldn't reset lower level resources like linestyles and clip projections here because it was possibly requested to push to mem before addMainObjects + + // try to add again outMainObjectIdx = addMainObject_Internal(mainObject); assert(outMainObjectIdx != InvalidMainObjectIdx); } @@ -638,6 +642,12 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex } } +const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const +{ + assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); + return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); +} + void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex) { finalizeAllCopiesToGPU(intendedNextSubmit); @@ -645,97 +655,61 @@ void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& // We reset Geometry Counters (drawObj+geometryInfos) because we're done rendering previous geometry // We don't reset counters for styles because we will be reusing them - resetGeometryCounters(); - -#if 1 - if (mainObjectIndex < maxMainObjects) - { - // Check if user is following proper usage, mainObjectIndex should be the last mainObj added before an autosubmit, because this is the only mainObj we want to maintain. - // See comments on`addMainObject_SubmitIfNeeded` function - // TODO: consider forcing this by not expose mainObjectIndex to user and keep track of a "currentMainObj" (?) - _NBL_DEBUG_BREAK_IF(mainObjectIndex != (currentMainObjectCount - 1u)); - - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it existed in geometry data and it was erased) - uint64_t newClipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - // only re-upload mainObjData if it's clipProjectionAddress was changed - if (newClipProjectionAddress != getMainObject(mainObjectIndex)->clipProjectionAddress) - { - // then modify the mainObject data - getMainObject(mainObjectIndex)->clipProjectionAddress = newClipProjectionAddress; - // we need to rewind back inMemMainObjectCount to this mainObjIndex so it re-uploads the current mainObject (because we modified it) - inMemMainObjectCount = core::min(inMemMainObjectCount, mainObjectIndex); - } - } - - // TODO: Consider resetting MainObjects here as well and addMainObject for the new data again, but account for the fact that mainObjectIndex now changed (either change through uint32_t& or keeping track of "currentMainObj" in drawResourcesFiller -#else - resetMainObjectCounters(); - - // If there is a mainObject data we need to maintain and keep it's clipProjectionAddr valid - if (mainObjectIndex < maxMainObjects) - { - MainObject mainObjToMaintain = *getMainObject(mainObjectIndex); - - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it exists in geometry data and it was reset) - // `acquireCurrentClipProjectionAddress` shouldn't/won't trigger auto-submit because geometry buffer counters were reset and our geometry buffer is supposed to be larger than a single clipProjectionData - mainObjToMaintain->clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - - // We're calling `addMainObject_Internal` instead of safer `addMainObject_SubmitIfNeeded` because we've reset our mainObject and we're sure this won't need an autoSubmit. - addMainObject_Internal(mainObjToMaintain); - } -#endif + resetDrawObjects(); } uint32_t DrawResourcesFiller::addMainObject_Internal(const MainObject& mainObject) { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); - - if (currentMainObjectCount >= MaxIndexableMainObjects) + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(MainObject); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + if (!enoughMem) return InvalidMainObjectIdx; - if (currentMainObjectCount >= maxMainObjects) + if (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects) return InvalidMainObjectIdx; - - void* dst = mainObjsArray + currentMainObjectCount; - memcpy(dst, &mainObject, sizeof(MainObject)); - uint32_t ret = currentMainObjectCount; - currentMainObjectCount++; - return ret; + resourcesCollection.mainObjects.vector.push_back(mainObject); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.mainObjects.vector.size() - 1u; } uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) { + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? + if (!enoughMem) + return InvalidStyleIdx; + // TODO: Additionally constraint by a max size? and return InvalidIdx if it would exceed + + LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( - LineStyle* stylesArray = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()); - for (uint32_t i = 0u; i < currentLineStylesCount; ++i) + for (uint32_t i = 0u; i < resourcesCollection.lineStyles.vector.size(); ++i) { - const LineStyle& itr = stylesArray[i]; - + const LineStyle& itr = resourcesCollection.lineStyles.vector[i]; if (itr == gpuLineStyle) return i; } - if (currentLineStylesCount >= maxLineStyles) - return InvalidStyleIdx; - - void* dst = stylesArray + currentLineStylesCount; - memcpy(dst, &gpuLineStyle, sizeof(LineStyle)); - return currentLineStylesCount++; + resourcesCollection.lineStyles.vector.push_back(gpuLineStyle); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.lineStyles.vector.size() - 1u; } uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t maxMemRequired = sizeof(DTMSettings) + 2 * sizeof(LineStyle); + const bool enoughMem = remainingResourcesSize >= maxMemRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) + return InvalidDTMSettingsIdx; + // TODO: Additionally constraint by a max size? and return InvalidIdx if it would exceed + DTMSettings dtmSettings; dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourLinesStartHeight; dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourLinesEndHeight; dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourLinesHeightInterval; - if (currentLineStylesCount + 2 > maxLineStyles) - return InvalidDTMSettingsIdx; - - assert(currentLineStylesCount + 2 <= maxLineStyles); - dtmSettings.outlineLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.outlineLineStyleInfo, intendedNextSubmit); - dtmSettings.contourLineStyleIdx = addLineStyle_SubmitIfNeeded(dtmSettingsInfo.contourLineStyleInfo, intendedNextSubmit); + dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineLineStyleInfo); + dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourLineStyleInfo); switch (dtmSettingsInfo.heightShadingMode) { @@ -751,25 +725,18 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm } _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); - if (currentDTMSettingsCount >= maxDtmSettings) - return InvalidDTMSettingsIdx; - - DTMSettings* settingsArray = reinterpret_cast(cpuDrawBuffers.dtmSettingsBuffer->getPointer()); - for (uint32_t i = 0u; i < currentDTMSettingsCount; ++i) + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) { - const DTMSettings& itr = settingsArray[i]; + const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; if (itr == dtmSettings) return i; } - - void* dst = settingsArray + currentDTMSettingsCount; - memcpy(dst, &dtmSettings, sizeof(DTMSettings)); - return currentDTMSettingsCount++; - - return InvalidDTMSettingsIdx; + + resourcesCollection.dtmSettings.vector.push_back(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.dtmSettings.vector.size() - 1u; } -uint64_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) { if (clipProjectionAddresses.empty()) return InvalidClipProjectionAddress; @@ -780,35 +747,26 @@ uint64_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmi return clipProjectionAddresses.back(); } -uint64_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) { - uint64_t outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - if (outClipProjectionAddress == InvalidClipProjectionAddress) + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(ClipProjectionData); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) { finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - resetGeometryCounters(); - resetMainObjectCounters(); - - outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - assert(outClipProjectionAddress != InvalidClipProjectionAddress); + + // resets itself + resetCustomClipProjections(); + // resets higher level resources + resetMainObjects(); + resetDrawObjects(); } - return outClipProjectionAddress; -} - -uint64_t DrawResourcesFiller::addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData) -{ - const uint64_t maxGeometryBufferClipProjData = (maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ClipProjectionData); - if (maxGeometryBufferClipProjData <= 0) - return InvalidClipProjectionAddress; - uint8_t* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &clipProjectionData, sizeof(ClipProjectionData)); - - const uint64_t ret = currentGeometryBufferSize + drawResourcesBDA; - currentGeometryBufferSize += sizeof(ClipProjectionData); - return ret; + resourcesCollection.clipProjections.vector.push_back(clipProjectionData); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.clipProjections.vector.size() - 1u; } void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) @@ -865,6 +823,10 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const assert(section.count >= 1u); assert(section.type == ObjectType::LINE); + + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // how many lines? --> memRequired = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount + const uint32_t maxGeometryBufferPoints = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(LinePointInfo)); const uint32_t maxGeometryBufferLines = (maxGeometryBufferPoints <= 1u) ? 0u : maxGeometryBufferPoints - 1u; @@ -889,6 +851,8 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const drawObj.geometryAddress += sizeof(LinePointInfo); } + // TODO: Add index buffer, + // Add Geometry if (objectsToUpload > 0u) { @@ -1049,7 +1013,6 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor // If we reset main objects will cause an auto submission bug, where adding an msdf texture while constructing glyphs will have wrong main object references (See how SingleLineTexts add Glyphs with a single mainObject) // for the same reason we don't reset line styles - // `submitCurrentObjectsAndReset` function handles the above + updating clipProjectionData and making sure the mainObjectIdx references to the correct clipProj data after reseting geometry buffer submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); } else diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index a8010a7ec..19579449a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -28,7 +28,8 @@ struct DrawResourcesFiller { public: - static constexpr size_t BDALoadAlignment = 8u; + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses + static constexpr size_t ResourcesMaxNaturalAlignment = 8u; /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources struct ResourceBase @@ -37,7 +38,7 @@ struct DrawResourcesFiller size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued virtual size_t getCount() const = 0; virtual size_t getStorageSize() const = 0; - virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), BDALoadAlignment); } + virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), ResourcesMaxNaturalAlignment); } }; /// @brief ResourceBase reserved for compute shader stages input/output @@ -61,7 +62,8 @@ struct DrawResourcesFiller /// @brief struct to hold all resources struct ResourcesCollection { - // auto-submission level 0 buffers (settings that mainObj references) + // auto-submission level 0 resources (settings that mainObj references) + // Not enough VRAM available to serve adding one of the level 0 resources: they clear themselves and everything from higher levels after doing submission CPUGeneratedResource lineStyles; CPUGeneratedResource dtmSettings; CPUGeneratedResource clipProjections; @@ -71,10 +73,10 @@ struct DrawResourcesFiller // auto-submission level 2 buffers CPUGeneratedResource drawObjects; - CPUGeneratedResource indexBuffer; + CPUGeneratedResource indexBuffer; // this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom geometries, etc - // Get Total memory consumption, If all ResourcesCollection get packed together with BDALoadAlignment + // Get Total memory consumption, If all ResourcesCollection get packed together with ResourcesMaxNaturalAlignment // used to decide when to overflow size_t calculateTotalConsumption() const { @@ -101,7 +103,7 @@ struct DrawResourcesFiller { // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) - return core::alignUp(sizeof(LineStyle) * 2u + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, BDALoadAlignment); + return core::alignUp(sizeof(LineStyle) * 2u + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); } void allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); @@ -169,10 +171,10 @@ struct DrawResourcesFiller void reset() { - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - resetDTMSettingsCounters(); + resetDrawObjects(); + resetMainObjects(); + resetLineStyles(); + resetDTMSettings(); } /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders @@ -227,13 +229,13 @@ struct DrawResourcesFiller bool finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit); - // Internal Function to call whenever we overflow while filling our buffers with geometry (potential limiters: indexBuffer, drawObjectsBuffer or geometryBuffer) - // ! mainObjIdx: is the mainObject the "overflowed" drawObjects belong to. - // mainObjIdx is required to ensure that valid data, especially the `clipProjectionData`, remains linked to the main object. - // This is important because, while other data may change during overflow handling, the main object must persist to maintain consistency throughout rendering all parts of it. (for example all lines and beziers of a single polyline) - // [ADVANCED] If you have not created your mainObject yet, pass `InvalidMainObjectIdx` (See drawHatch) + const size_t calculateRemainingResourcesSize() const; + + // Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex); + /// @return index to added main object. + /// It will return `InvalidMainObjectIndex` if it there isn't enough remaining resources memory OR the index would exceed MaxIndexableMainObjects uint32_t addMainObject_Internal(const MainObject& mainObject); uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); @@ -242,11 +244,9 @@ struct DrawResourcesFiller // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. - uint64_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); + uint32_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); - uint64_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); - - uint64_t addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData); + uint32_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); static constexpr uint32_t getCageCountPerPolylineObject(ObjectType type) { @@ -269,44 +269,32 @@ struct DrawResourcesFiller bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); - void resetMainObjectCounters() + void resetMainObjects() { - inMemMainObjectCount = 0u; - currentMainObjectCount = 0u; + resourcesCollection.mainObjects.vector.clear(); } - // WARN: If you plan to use this, make sure you either reset the mainObjectCounters as well - // Or if you want to keep your mainObject around, make sure you're using the `submitCurrentObjectsAndReset` function instead of calling this directly - // So that it makes your mainObject point to the correct clipProjectionData (which exists in the geometry buffer) - void resetGeometryCounters() + // these resources are data related to chunks of a whole mainObject + void resetDrawObjects() { - inMemDrawObjectCount = 0u; - currentDrawObjectCount = 0u; - - inMemGeometryBufferSize = 0u; - currentGeometryBufferSize = 0u; - - // Invalidate all the clip projection addresses because geometry buffer got reset - for (auto& clipProjAddr : clipProjectionAddresses) - clipProjAddr = InvalidClipProjectionAddress; + resourcesCollection.drawObjects.vector.clear(); + resourcesCollection.indexBuffer.vector.clear(); + resourcesCollection.geometryInfo.vector.clear(); } - void resetLineStyleCounters() + void resetCustomClipProjections() { - currentLineStylesCount = 0u; - inMemLineStylesCount = 0u; + resourcesCollection.clipProjections.vector.clear(); } - void resetDTMSettingsCounters() + void resetLineStyles() { - currentDTMSettingsCount = 0u; - inMemDTMSettingsCount = 0u; + resourcesCollection.lineStyles.vector.clear(); } - MainObject* getMainObject(uint32_t idx) + void resetDTMSettings() { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); - return &mainObjsArray[idx]; + resourcesCollection.dtmSettings.vector.clear(); } // MSDF Hashing and Caching Internal Functions diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 020b7cf6c..6784e6b70 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -289,7 +289,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); size_t bufferSize = 512u * 1024u * 1024u; // 512 MB - drawResourcesFiller.allocateDrawResourcesBuffer(m_device.get(), bufferSize); + drawResourcesFiller.allocateResourcesBuffer(m_device.get(), bufferSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { From 2abd6b970c2394a324f747ab6bc432ba5c9fa7a6 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 02:57:47 +0330 Subject: [PATCH 019/129] compiles but probably has runtime errors --- 62_CAD/DrawResourcesFiller.cpp | 208 ++++++++++-------- 62_CAD/DrawResourcesFiller.h | 40 ++-- 62_CAD/main.cpp | 4 +- .../shaders/main_pipeline/vertex_shader.hlsl | 48 ++-- 4 files changed, 168 insertions(+), 132 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 1eff63552..80ddc0d57 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -15,33 +15,6 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) submitDraws = func; } -//void DrawResourcesFiller::allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t maxIndices) -//{ -// maxIndexCount = maxIndices; -// const size_t indexBufferSize = maxIndices * sizeof(index_buffer_type); -// auto indexBuffer = ICPUBuffer::create({ indexBufferSize }); -// -// index_buffer_type* indices = reinterpret_cast(indexBuffer->getPointer()); -// for (uint32_t i = 0u; i < maxIndices / 6u; ++i) -// { -// index_buffer_type objIndex = i; -// indices[i * 6] = objIndex * 4u + 1u; -// indices[i * 6 + 1u] = objIndex * 4u + 0u; -// indices[i * 6 + 2u] = objIndex * 4u + 2u; -// -// indices[i * 6 + 3u] = objIndex * 4u + 1u; -// indices[i * 6 + 4u] = objIndex * 4u + 2u; -// indices[i * 6 + 5u] = objIndex * 4u + 3u; -// } -// -// IGPUBuffer::SCreationParams indexBufferCreationParams = {}; -// indexBufferCreationParams.size = indexBufferSize; -// indexBufferCreationParams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; -// -// m_utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=m_copyQueue}, std::move(indexBufferCreationParams), indices).move_into(gpuDrawBuffers.indexBuffer); -// gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); -//} - void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { size = core::alignUp(size, ResourcesMaxNaturalAlignment); @@ -146,6 +119,8 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); } + return; // TODO: Remove + if (!polyline.getConnectors().empty()) { uint32_t currentConnectorPolylineObject = 0u; @@ -176,19 +151,17 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM { // NOTE[ERFAN]: these push contants will be removed, everything will be accessed by dtmSettings, including where the vertex buffer data resides - auto& geometryBytesVector = resourcesCollection.geometryInfo.vector; - size_t geometryBufferOffset = core::alignUp(geometryBytesVector.size(), ResourcesMaxNaturalAlignment); - geometryBytesVector.resize(geometryBufferOffset + dataToAddByteSize); // this will increase total resources consumption and reduce remainingResourcesSize --> no need to update any size trackers // Copy VertexBuffer - void* dst = geometryBytesVector.data() + geometryBufferOffset; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(dataToAddByteSize, alignof(CTriangleMesh::vertex_t)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` drawData.pushConstants.triangleMeshVerticesBaseAddress = geometryBufferOffset; memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); geometryBufferOffset += vtxBuffByteSize; // Copy IndexBuffer - dst = geometryBytesVector.data() + geometryBufferOffset; + dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; drawData.indexBufferOffset = geometryBufferOffset; memcpy(dst, mesh.getIndices().data(), indexBuffByteSize); geometryBufferOffset += indexBuffByteSize; @@ -227,6 +200,7 @@ void DrawResourcesFiller::drawHatch( const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) { + return; // TODO: Remove if (color.a == 0.0f) // not visible return; @@ -271,6 +245,7 @@ void DrawResourcesFiller::drawFontGlyph( uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) { +#if 0 uint32_t textureIdx = InvalidTextureIdx; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); @@ -293,11 +268,13 @@ void DrawResourcesFiller::drawFontGlyph( // TODO: Log, probably getGlyphMSDF(face,glyphIdx) returned nullptr ICPUImage ptr _NBL_DEBUG_BREAK_IF(true); } +#endif } // TODO: FIX void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) { +#if 0 auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool { const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); @@ -340,6 +317,7 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 bool success = addImageObject_Internal(info, mainObjIdx); assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object } +#endif } bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) @@ -403,7 +381,7 @@ uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, ui MainObject mainObject = {}; mainObject.styleIdx = styleIdx; mainObject.dtmSettingsIdx = dtmSettingsIdx; - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); + mainObject.clipProjectionIndex = acquireCurrentClipProjectionIndex(intendedNextSubmit); uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); if (outMainObjectIdx == InvalidMainObjectIdx) { @@ -428,7 +406,7 @@ uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, ui void DrawResourcesFiller::pushClipProjectionData(const ClipProjectionData& clipProjectionData) { clipProjections.push_back(clipProjectionData); - clipProjectionAddresses.push_back(InvalidClipProjectionAddress); + clipProjectionIndices.push_back(InvalidClipProjectionIndex); } void DrawResourcesFiller::popClipProjectionData() @@ -437,7 +415,7 @@ void DrawResourcesFiller::popClipProjectionData() return; clipProjections.pop_back(); - clipProjectionAddresses.pop_back(); + clipProjectionIndices.pop_back(); } bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) @@ -736,15 +714,15 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm return resourcesCollection.dtmSettings.vector.size() - 1u; } -uint32_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::acquireCurrentClipProjectionIndex(SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjectionAddresses.empty()) - return InvalidClipProjectionAddress; + if (clipProjectionIndices.empty()) + return InvalidClipProjectionIndex; - if (clipProjectionAddresses.back() == InvalidClipProjectionAddress) - clipProjectionAddresses.back() = addClipProjectionData_SubmitIfNeeded(clipProjections.back(), intendedNextSubmit); + if (clipProjectionIndices.back() == InvalidClipProjectionIndex) + clipProjectionIndices.back() = addClipProjectionData_SubmitIfNeeded(clipProjections.back(), intendedNextSubmit); - return clipProjectionAddresses.back(); + return clipProjectionIndices.back(); } uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) @@ -779,19 +757,32 @@ void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyl assert(false); // we don't handle other object types } +// TODO: FIX void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferConnectors = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(PolylineConnector)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferConnectors); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); +#if 0 + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; - const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + if (objectsToUpload <= 0u) + return; + + + + + + // TODO: + + + + + // Add DrawObjs DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; @@ -816,6 +807,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po } currentPolylineConnectorObj += objectsToUpload; +#endif } void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) @@ -825,94 +817,120 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - // how many lines? --> memRequired = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount + if (remainingResourcesSize < sizeof(LinePointInfo)) + return; - const uint32_t maxGeometryBufferPoints = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(LinePointInfo)); - const uint32_t maxGeometryBufferLines = (maxGeometryBufferPoints <= 1u) ? 0u : maxGeometryBufferPoints - 1u; - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferLines); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + // how many lines fit into mem? --> memConsumption = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount + const uint32_t uploadableObjects = (remainingResourcesSize - sizeof(LinePointInfo)) / (sizeof(LinePointInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem const uint32_t lineCount = section.count; const uint32_t remainingObjects = lineCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); + + + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(pointsByteSize, alignof(LinePointInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const LinePointInfo& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); + memcpy(dst, &linePoint, pointsByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = i*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = i*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = i*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = i*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = i*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = i*4u + 3u; + } // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::LINE) | 0 << 16); - drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(LinePointInfo); - } - - // TODO: Add index buffer, - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); - memcpy(dst, &linePoint, pointsByteSize); - currentGeometryBufferSize += pointsByteSize; - } + } currentObjectInSection += objectsToUpload; } void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) { - constexpr uint32_t CagesPerQuadBezier = getCageCountPerPolylineObject(ObjectType::QUAD_BEZIER); + constexpr uint32_t CagesPerQuadBezier = 3u; // TODO: Break into 3 beziers in compute shader. + assert(section.type == ObjectType::QUAD_BEZIER); - const uint32_t maxGeometryBufferBeziers = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(QuadraticBezierInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // how many quad bezier objects fit into mem? + // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferBeziers); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects /= CagesPerQuadBezier; - const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t cagesCount = objectsToUpload * CagesPerQuadBezier; + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(beziersByteSize, alignof(QuadraticBezierInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const QuadraticBezierInfo& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); + memcpy(dst, &quadBezier, beziersByteSize); + + + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); + for (uint32_t i = 0u; i < cagesCount; ++i) + { + indexBufferToBeFilled[i*6] = i*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = i*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = i*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = i*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = i*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = i*4u + 3u; + } + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(cagesCount); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { for (uint16_t subObject = 0; subObject < CagesPerQuadBezier; subObject++) { drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::QUAD_BEZIER) | (subObject << 16)); - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i * CagesPerQuadBezier + subObject] = drawObj; } drawObj.geometryAddress += sizeof(QuadraticBezierInfo); } - // Add Geometry - if (objectsToUpload > 0u) - { - const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); - memcpy(dst, &quadBezier, beziersByteSize); - currentGeometryBufferSize += beziersByteSize; - } currentObjectInSection += objectsToUpload; } +// TODO: FIX void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex) { +#if 0 const uint32_t maxGeometryBufferHatchBoxes = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(Hatch::CurveHatchBox)); uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; @@ -946,10 +964,13 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren // Add Indices currentDrawObjectCount += uploadableObjects; currentObjectInSection += uploadableObjects; +#endif } +// TODO: FIX bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx) { +#if 0 const uint32_t maxGeometryBufferFontGlyphs = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(GlyphInfo)); uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; @@ -977,6 +998,7 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint { return false; } +#endif } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 19579449a..439c7cfaf 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -57,6 +57,29 @@ struct DrawResourcesFiller core::vector vector; size_t getCount() const { return vector.size(); } size_t getStorageSize() const { return vector.size() * sizeof(T); } + + /// @return pointer to start of the data to be filled, up to additionalCount + T* increaseCountAndGetPtr(size_t additionalCount) + { + size_t offset = vector.size(); + vector.resize(offset + additionalCount); + return &vector[offset]; + } + + /// @brief increases size of general-purpose resources that hold bytes + /// @param additionalSize + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= ResourcesMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @return pointer to start of the data to be filled, up to additional size + // TODO: make sure t is 1 byte with templates. + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + { + assert(core::isPoT(alignment) && alignment <= ResourcesMaxNaturalAlignment); + size_t offset = core::alignUp(vector.size(), alignment); + vector.resize(offset + additionalSize); + return offset; + } + + T* data() { return vector.data(); } }; /// @brief struct to hold all resources @@ -99,7 +122,7 @@ struct DrawResourcesFiller void setSubmitDrawsFunction(const SubmitFunc& func); /// @brief Get minimum required size for resources buffer (containing objects and geometry info and their settings) - consteval size_t getMinimumRequiredResourcesBufferSize() const + static constexpr size_t getMinimumRequiredResourcesBufferSize() { // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) @@ -178,7 +201,7 @@ struct DrawResourcesFiller } /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders - const ResourcesCollection& getResourcesCollection() const { return &resourcesCollection; } + const ResourcesCollection& getResourcesCollection() const { return resourcesCollection; } /// @brief buffer containing all non-texture type resources nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } @@ -244,19 +267,10 @@ struct DrawResourcesFiller // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. - uint32_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); + uint32_t acquireCurrentClipProjectionIndex(SIntendedSubmitInfo& intendedNextSubmit); uint32_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); - static constexpr uint32_t getCageCountPerPolylineObject(ObjectType type) - { - if (type == ObjectType::LINE) - return 1u; - else if (type == ObjectType::QUAD_BEZIER) - return 3u; - return 0u; - }; - void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); @@ -397,7 +411,7 @@ struct DrawResourcesFiller IQueue* m_copyQueue; std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. - std::deque clipProjectionAddresses; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops + std::deque clipProjectionIndices; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops // MSDF GetGlyphMSDFTextureFunc getGlyphMSDF; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 6784e6b70..1f83244d6 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1193,10 +1193,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu globalData.pointers = { .lineStyles = baseAddress + resources.lineStyles.bufferOffset, .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, - .customClipProjections = baseAddress + resources.customClipProjections.bufferOffset, + .customClipProjections = baseAddress + resources.clipProjections.bufferOffset, .mainObjects = baseAddress + resources.mainObjects.bufferOffset, .drawObjects = baseAddress + resources.drawObjects.bufferOffset, - .geometryBuffer = baseAddress + resources.geometryBuffer.bufferOffset, + .geometryBuffer = baseAddress + resources.geometryInfo.bufferOffset, }; globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 5abe693ec..5dbe386fd 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -173,11 +173,11 @@ PSInput main(uint vertexID : SV_VertexID) if (objType == ObjectType::LINE) { pfloat64_t2 points[2u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(LinePointInfo), 8u); + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); outV.setCurrentPhaseShift(phaseShift); outV.setPatternStretch(patternStretch); @@ -213,12 +213,12 @@ PSInput main(uint vertexID : SV_VertexID) else if (objType == ObjectType::QUAD_BEZIER) { pfloat64_t2 points[3u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - points[2u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + points[2u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); outV.setCurrentPhaseShift(phaseShift); outV.setPatternStretch(patternStretch); @@ -387,9 +387,9 @@ PSInput main(uint vertexID : SV_VertexID) if (lineStyle.isRoadStyleFlag) { - const pfloat64_t2 circleCenter = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - const float2 v = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); + const pfloat64_t2 circleCenter = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const float2 v = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); @@ -449,13 +449,13 @@ PSInput main(uint vertexID : SV_VertexID) else if (objType == ObjectType::CURVE_BOX) { CurveBox curveBox; - curveBox.aabbMin = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - curveBox.aabbMax = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + curveBox.aabbMin = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + curveBox.aabbMax = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); for (uint32_t i = 0; i < 3; i ++) { - curveBox.curveMin[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); - curveBox.curveMax[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); + curveBox.curveMin[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); + curveBox.curveMax[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); } pfloat64_t2 aabbMaxXMinY; @@ -540,10 +540,10 @@ PSInput main(uint vertexID : SV_VertexID) const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style GlyphInfo glyphInfo; - glyphInfo.topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - glyphInfo.dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - glyphInfo.aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + glyphInfo.topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + glyphInfo.dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + glyphInfo.aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); float32_t2 minUV = glyphInfo.getMinUV(); uint16_t textureID = glyphInfo.getTextureID(); @@ -591,10 +591,10 @@ PSInput main(uint vertexID : SV_VertexID) } else if (objType == ObjectType::IMAGE) { - pfloat64_t2 topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - float32_t2 dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - float32_t aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - uint32_t textureID = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); From ab8f303216552dec39ac1e01aa3199043f3a82d7 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 02:59:42 +0330 Subject: [PATCH 020/129] [62.CAD] compile error fix --- 62_CAD/DrawResourcesFiller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 439c7cfaf..85d88f2eb 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -38,7 +38,7 @@ struct DrawResourcesFiller size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued virtual size_t getCount() const = 0; virtual size_t getStorageSize() const = 0; - virtual size_t getAlignedStorageSize() const { core::alignUp(getStorageSize(), ResourcesMaxNaturalAlignment); } + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), ResourcesMaxNaturalAlignment); } }; /// @brief ResourceBase reserved for compute shader stages input/output From 8f2ae9c51c2e47dc152d4081ea0a1ec495940fd7 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 03:01:58 +0330 Subject: [PATCH 021/129] small shader fixes --- 62_CAD/shaders/globals.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index f9c89d45c..93578b7b6 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -436,7 +436,7 @@ LineStyle loadLineStyle(const uint32_t index) } DTMSettings loadDTMSettings(const uint32_t index) { - return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSetting), 8u); + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 8u); } ClipProjectionData loadCustomClipProjection(const uint32_t index) { @@ -444,11 +444,11 @@ ClipProjectionData loadCustomClipProjection(const uint32_t index) } MainObject loadMainObject(const uint32_t index) { - return vk::RawBufferLoad(globals.pointers.mainObjs + index * sizeof(MainObject), 8u); + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 8u); } DrawObject loadDrawObject(const uint32_t index) { - return vk::RawBufferLoad(globals.pointers.drawObjs + index * sizeof(DrawObject), 8u); + return vk::RawBufferLoad(globals.pointers.drawObjects + index * sizeof(DrawObject), 8u); } #endif From 97b1693c23a656efa853f0bc055125cc41a913dd Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 03:06:16 +0330 Subject: [PATCH 022/129] more fixes --- 62_CAD/main.cpp | 32 ++------------------------- 62_CAD/shaders/geotexture/common.hlsl | 2 +- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 1f83244d6..ed3bdf88c 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -674,48 +674,20 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }, { .binding = 1u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 2u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 3u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 4u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 5u, .type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 6u, + .binding = 2u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 7u, + .binding = 3u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = bindlessTextureFlags, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl index 82a646319..691cd3d3b 100644 --- a/62_CAD/shaders/geotexture/common.hlsl +++ b/62_CAD/shaders/geotexture/common.hlsl @@ -25,7 +25,7 @@ struct PSInput [[vk::push_constant]] GeoTextureOBB geoTextureOBB; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] Texture2D geoTexture : register(t0); From 76360625346f5eb4c0232ff9e93eed078f37fa63 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 03:55:34 +0330 Subject: [PATCH 023/129] fixes to cad example --- 62_CAD/Polyline.h | 4 +- 62_CAD/main.cpp | 76 +++++++++---------- 62_CAD/shaders/globals.hlsl | 2 +- .../shaders/main_pipeline/vertex_shader.hlsl | 1 + 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/62_CAD/Polyline.h b/62_CAD/Polyline.h index 03b2f2c30..bee5650c7 100644 --- a/62_CAD/Polyline.h +++ b/62_CAD/Polyline.h @@ -66,8 +66,6 @@ struct LineStyleInfo rigidSegmentIdx = InvalidRigidSegmentIndex; phaseShift = 0.0f; - assert(stipplePatternUnnormalizedRepresentation.size() <= StipplePatternMaxSize); - if (stipplePatternUnnormalizedRepresentation.size() == 0) { stipplePatternSize = 0; @@ -110,6 +108,8 @@ struct LineStyleInfo stipplePatternTransformed[0] += stipplePatternTransformed[stipplePatternTransformed.size() - 1]; stipplePatternTransformed.pop_back(); } + + assert(stipplePatternTransformed.size() <= StipplePatternMaxSize); if (stipplePatternTransformed.size() != 1) { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ed3bdf88c..5f9e88694 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -867,14 +867,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto mainPipelineFragmentCpuShader = loadCompileShader("../shaders/main_pipeline/fragment.hlsl", IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY); auto mainPipelineVertexCpuShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineVertCpuShader = loadCompileShader(GeoTextureRenderer::VertexShaderRelativePath, IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineFragCpuShader = loadCompileShader(GeoTextureRenderer::FragmentShaderRelativePath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); + // auto geoTexturePipelineVertCpuShader = loadCompileShader(GeoTextureRenderer::VertexShaderRelativePath, IShader::E_SHADER_STAGE::ESS_VERTEX); + // auto geoTexturePipelineFragCpuShader = loadCompileShader(GeoTextureRenderer::FragmentShaderRelativePath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); mainPipelineFragmentCpuShader->setShaderStage(IShader::E_SHADER_STAGE::ESS_FRAGMENT); mainPipelineFragmentShaders = m_device->createShader({ mainPipelineFragmentCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); mainPipelineVertexShader = m_device->createShader({ mainPipelineVertexCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[0] = m_device->createShader({ geoTexturePipelineVertCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[1] = m_device->createShader({ geoTexturePipelineFragCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + // geoTexturePipelineShaders[0] = m_device->createShader({ geoTexturePipelineVertCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + // geoTexturePipelineShaders[1] = m_device->createShader({ geoTexturePipelineFragCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); core::smart_refctd_ptr shaderWriteCacheFile; { @@ -1011,7 +1011,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu ); m_geoTextureRenderer = std::unique_ptr(new GeoTextureRenderer(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger))); - m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); + // m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); // Create the Semaphores m_renderSemaphore = m_device->createSemaphore(0ull); @@ -1155,33 +1155,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); // cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cb->beginDebugMarker("Frame"); - - float64_t3x3 projectionToNDC; - projectionToNDC = m_Camera.constructViewProjection(); - - Globals globalData = {}; - uint64_t baseAddress = drawResourcesFiller.getResourcesGPUBuffer()->getDeviceAddress(); - const auto& resources = drawResourcesFiller.getResourcesCollection(); - globalData.pointers = { - .lineStyles = baseAddress + resources.lineStyles.bufferOffset, - .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, - .customClipProjections = baseAddress + resources.clipProjections.bufferOffset, - .mainObjects = baseAddress + resources.mainObjects.bufferOffset, - .drawObjects = baseAddress + resources.drawObjects.bufferOffset, - .geometryBuffer = baseAddress + resources.geometryInfo.bufferOffset, - }; - globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; - globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - globalData.defaultClipProjection.projectionToNDC = projectionToNDC; - globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); - globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); - auto screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); - globalData.screenToWorldRatio = screenToWorld; - globalData.worldToScreenRatio = (1.0/screenToWorld); - globalData.miterLimit = 10.0f; - SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; - bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); - assert(updateSuccess); nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -1214,8 +1187,36 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; - auto&r = drawResourcesFiller; + const auto& resources = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + float64_t3x3 projectionToNDC; + projectionToNDC = m_Camera.constructViewProjection(); + + Globals globalData = {}; + uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); + globalData.pointers = { + .lineStyles = baseAddress + resources.lineStyles.bufferOffset, + .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, + .customClipProjections = baseAddress + resources.clipProjections.bufferOffset, + .mainObjects = baseAddress + resources.mainObjects.bufferOffset, + .drawObjects = baseAddress + resources.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resources.geometryInfo.bufferOffset, + }; + globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; + globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; + globalData.defaultClipProjection.projectionToNDC = projectionToNDC; + globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); + globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); + auto screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); + globalData.screenToWorldRatio = screenToWorld; + globalData.worldToScreenRatio = (1.0/screenToWorld); + globalData.miterLimit = 10.0f; + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; + bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); + assert(updateSuccess); + asset::SViewport vp = { .x = 0u, @@ -1261,8 +1262,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS; bufferBarrier.range = { .offset = 0u, @@ -1293,9 +1294,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - const auto& resources = drawResourcesFiller.getResourcesCollection(); - const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); - const uint32_t currentIndexCount = resources.drawObjects.getCount() * 6u; IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; @@ -1424,7 +1422,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; retval.validations = true; - retval.synchronizationValidation = true; + retval.synchronizationValidation = false; return retval; } protected: diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 93578b7b6..562f523e6 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -444,7 +444,7 @@ ClipProjectionData loadCustomClipProjection(const uint32_t index) } MainObject loadMainObject(const uint32_t index) { - return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 8u); + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 4u); } DrawObject loadDrawObject(const uint32_t index) { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 5dbe386fd..b62cbe543 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -152,6 +152,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setObjType(objType); outV.setMainObjectIdx(drawObj.mainObjIndex); + MainObject mainObj = loadMainObject(drawObj.mainObjIndex); ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); From eec41cba4bb8bab52bf2f2f3a88d2784efc71109 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 04:41:11 +0330 Subject: [PATCH 024/129] fixed hatches and polyline connector's auto submission logic --- 62_CAD/DrawResourcesFiller.cpp | 153 +++++++++++++++++---------------- 62_CAD/DrawResourcesFiller.h | 1 + 2 files changed, 78 insertions(+), 76 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 80ddc0d57..45abd8555 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -436,9 +436,9 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext return false; } + drawBuffer.bufferOffset = copyRange.offset; if (copyRange.size > 0ull) { - drawBuffer.bufferOffset = copyRange.offset; if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) return false; copiedResourcesSize += drawBuffer.getAlignedStorageSize(); @@ -757,10 +757,8 @@ void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyl assert(false); // we don't handle other object types } -// TODO: FIX void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx) { -#if 0 const size_t remainingResourcesSize = calculateRemainingResourcesSize(); const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); @@ -773,41 +771,39 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po if (objectsToUpload <= 0u) return; + // Add Geometry + const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(connectorsByteSize, alignof(PolylineConnector)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const PolylineConnector& connector = polyline.getConnectors()[currentPolylineConnectorObj]; + memcpy(dst, &connector, connectorsByteSize); - - - - // TODO: - - - - + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::POLYLINE_CONNECTOR) | 0 << 16); - drawObj.geometryAddress = drawResourcesBDA + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(PolylineConnector); - } - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& connector = polyline.getConnectors()[currentPolylineConnectorObj]; - memcpy(dst, &connector, connectorsByteSize); - currentGeometryBufferSize += connectorsByteSize; - } + } currentPolylineConnectorObj += objectsToUpload; -#endif } void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) @@ -833,8 +829,6 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const // Add Geometry const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); - - size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(pointsByteSize, alignof(LinePointInfo)); void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; const LinePointInfo& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); @@ -842,14 +836,15 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const // Push Indices, remove later when compute fills this uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i*6] = i*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = i*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = i*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = i*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = i*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = i*4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } // Add DrawObjs @@ -898,14 +893,15 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // Push Indices, remove later when compute fills this uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < cagesCount; ++i) { - indexBufferToBeFilled[i*6] = i*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = i*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = i*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = i*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = i*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = i*4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } // Add DrawObjs @@ -927,50 +923,58 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, currentObjectInSection += objectsToUpload; } -// TODO: FIX void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex) { -#if 0 - const uint32_t maxGeometryBufferHatchBoxes = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(Hatch::CurveHatchBox)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferHatchBoxes); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; - uploadableObjects = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); - for (uint32_t i = 0; i < uploadableObjects; i++) - { - const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(i + currentObjectInSection); - - uint64_t hatchBoxAddress; - { - static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &hatchBox, sizeof(CurveBox)); - hatchBoxAddress = drawResourcesBDA + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(CurveBox); - } + if (objectsToUpload <= 0u) + return; - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); - drawObj.mainObjIndex = mainObjIndex; - drawObj.geometryAddress = hatchBoxAddress; - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount + i; - memcpy(dst, &drawObj, sizeof(DrawObject)); + // Add Geometry + static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); + const auto curveBoxesByteSize = sizeof(Hatch::CurveHatchBox) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(curveBoxesByteSize, alignof(Hatch::CurveHatchBox)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(currentObjectInSection); // WARNING: This is assuming hatch boxes are contigous in memory, TODO: maybe make that more obvious through Hatch interface + memcpy(dst, &hatchBox, curveBoxesByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIndex; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + drawObjectsToBeFilled[i] = drawObj; + drawObj.geometryAddress += sizeof(Hatch::CurveHatchBox); } // Add Indices - currentDrawObjectCount += uploadableObjects; currentObjectInSection += uploadableObjects; -#endif } -// TODO: FIX bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx) { -#if 0 const uint32_t maxGeometryBufferFontGlyphs = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(GlyphInfo)); uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; @@ -994,11 +998,8 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint return true; } - else - { - return false; - } -#endif + + return false; } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 85d88f2eb..b329f4d01 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -196,6 +196,7 @@ struct DrawResourcesFiller { resetDrawObjects(); resetMainObjects(); + resetCustomClipProjections(); resetLineStyles(); resetDTMSettings(); } From 692df5ff80f72d8d99bef5d5efa3e4813a8233f3 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 04:54:10 +0330 Subject: [PATCH 025/129] fix glyph and image auto-submission logic --- 62_CAD/DrawResourcesFiller.cpp | 114 ++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 53 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 45abd8555..8892f93c8 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -119,8 +119,6 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); } - return; // TODO: Remove - if (!polyline.getConnectors().empty()) { uint32_t currentConnectorPolylineObject = 0u; @@ -200,7 +198,6 @@ void DrawResourcesFiller::drawHatch( const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) { - return; // TODO: Remove if (color.a == 0.0f) // not visible return; @@ -234,7 +231,6 @@ void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); } -// TODO: FIX void DrawResourcesFiller::drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, @@ -245,7 +241,6 @@ void DrawResourcesFiller::drawFontGlyph( uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) { -#if 0 uint32_t textureIdx = InvalidTextureIdx; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); @@ -268,39 +263,45 @@ void DrawResourcesFiller::drawFontGlyph( // TODO: Log, probably getGlyphMSDF(face,glyphIdx) returned nullptr ICPUImage ptr _NBL_DEBUG_BREAK_IF(true); } -#endif } -// TODO: FIX void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) { -#if 0 auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool { - const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferImageObjects); - - if (uploadableObjects >= 1u) - { - void* dstGeom = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dstGeom, &imageObjectInfo, sizeof(ImageObjectInfo)); - uint64_t geomBufferAddr = drawResourcesBDA + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(ImageObjectInfo); - - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geomBufferAddr; - void* dstDrawObj = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dstDrawObj, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; - - return true; - } - else + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + + if (uploadableObjects <= 0u) return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(ImageObjectInfo), alignof(ImageObjectInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &imageObjectInfo, sizeof(ImageObjectInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; }; uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); @@ -317,7 +318,6 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 bool success = addImageObject_Internal(info, mainObjIdx); assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object } -#endif } bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) @@ -975,31 +975,39 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferFontGlyphs = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(GlyphInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferFontGlyphs); + if (uploadableObjects <= 0u) + return false; - if (uploadableObjects >= 1u) - { - void* geomDst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(geomDst, &glyphInfo, sizeof(GlyphInfo)); - uint64_t fontGlyphAddr = drawResourcesBDA + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(GlyphInfo); - - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = fontGlyphAddr; - void* drawObjDst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(drawObjDst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GlyphInfo), alignof(GlyphInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &glyphInfo, sizeof(GlyphInfo)); - return true; - } + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; - return false; + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) From cf05dcad580343633ca1fb72b0232753ad3a299f Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 04:59:01 +0330 Subject: [PATCH 026/129] Fix ClipProjectionIndices reset --- 62_CAD/DrawResourcesFiller.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index b329f4d01..7ef3e2020 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -300,6 +300,10 @@ struct DrawResourcesFiller void resetCustomClipProjections() { resourcesCollection.clipProjections.vector.clear(); + + // Invalidate all the clip projection addresses because clipProjections buffer got reset + for (auto& clipProjAddr : clipProjectionIndices) + clipProjAddr = InvalidClipProjectionIndex; } void resetLineStyles() From 3304cde640bbe189e9e579ef1af33602b19277b5 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 14:18:02 +0330 Subject: [PATCH 027/129] auto-submission more mature, corrects resource references on the go by not storing any index but address to active + functions to begin/endMainObject and setActiveLineStyle --- 62_CAD/DrawResourcesFiller.cpp | 294 ++++++++++++++++++--------------- 62_CAD/DrawResourcesFiller.h | 116 ++++++++----- 62_CAD/SingleLineText.cpp | 7 +- 62_CAD/main.cpp | 11 +- 62_CAD/shaders/globals.hlsl | 12 +- 5 files changed, 254 insertions(+), 186 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 8892f93c8..8ad13cb97 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -19,6 +19,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, { size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); + size = 512u; IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; @@ -84,16 +85,17 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line if (!lineStyleInfo.isVisible()) return; - uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyleInfo, intendedNextSubmit); - - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); - - drawPolyline(polyline, mainObjIdx, intendedNextSubmit); + setActiveLineStyle(lineStyleInfo); + + beginMainObject(MainObjectType::POLYLINE); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); } -void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit) { - if (polylineMainObjIdx == InvalidMainObjectIdx) + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) { // TODO: assert or log error here assert(false); @@ -108,7 +110,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p while (currentSectionIdx < sectionsCount) { const auto& currentSection = polyline.getSectionInfoAt(currentSectionIdx); - addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, polylineMainObjIdx); + addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, mainObjectIdx); if (currentObjectInSection >= currentSection.count) { @@ -116,7 +118,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p currentObjectInSection = 0u; } else - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } if (!polyline.getConnectors().empty()) @@ -124,16 +126,20 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p uint32_t currentConnectorPolylineObject = 0u; while (currentConnectorPolylineObject < polyline.getConnectors().size()) { - addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, polylineMainObjIdx); + addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, mainObjectIdx); if (currentConnectorPolylineObject < polyline.getConnectors().size()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } } } void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { + setActiveDTMSettings(dtmSettingsInfo); + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + drawData.pushConstants.triangleMeshMainObjectIndex = mainObjectIdx; + ICPUBuffer::SCreationParams geometryBuffParams; // concatenate the index and vertex buffer into the geometry buffer @@ -166,12 +172,6 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM } drawData.indexCount = mesh.getIndexCount(); - - // call addMainObject_SubmitIfNeeded, use its index in push constants - - uint32_t dtmSettingsIndex = addDTMSettings_SubmitIfNeeded(dtmSettingsInfo, intendedNextSubmit); - - drawData.pushConstants.triangleMeshMainObjectIndex = addMainObject_SubmitIfNeeded(InvalidStyleIdx, dtmSettingsIndex, intendedNextSubmit); } // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor @@ -207,23 +207,27 @@ void DrawResourcesFiller::drawHatch( MSDFInputInfo msdfInfo = MSDFInputInfo(fillPattern); textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), InvalidMainObjectIdx, intendedNextSubmit); + textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIdx); // probably getHatchFillPatternMSDF returned nullptr } LineStyleInfo lineStyle = {}; lineStyle.color = color; lineStyle.screenSpaceLineWidth = nbl::hlsl::bit_cast(textureIdx); - const uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); - uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject used in vertex shader. You can think of it as a Cage. + + setActiveLineStyle(lineStyle); + beginMainObject(MainObjectType::HATCH); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { - addHatch_Internal(hatch, currentObjectInSection, mainObjIdx); + addHatch_Internal(hatch, currentObjectInSection, mainObjectIdx); if (currentObjectInSection < hatch.getHatchBoxCount()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } + + endMainObject(); } void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) @@ -238,14 +242,16 @@ void DrawResourcesFiller::drawFontGlyph( float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) { uint32_t textureIdx = InvalidTextureIdx; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), mainObjIdx, intendedNextSubmit); + textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), intendedNextSubmit); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + assert(mainObjIdx != InvalidMainObjectIdx); if (textureIdx != InvalidTextureIdx) { @@ -304,7 +310,9 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 return true; }; - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); + beginMainObject(MainObjectType::IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); ImageObjectInfo info = {}; info.topLeft = topLeftPos; @@ -318,6 +326,8 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 bool success = addImageObject_Internal(info, mainObjIdx); assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object } + + endMainObject(); } bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) @@ -328,94 +338,41 @@ bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNe return success; } -uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) { - const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? - - uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); - if (outLineStyleIdx == InvalidStyleIdx) - { - // There wasn't enough resource memory remaining to fit a single LineStyle - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); - - // resets itself - resetLineStyles(); - // resets higher level resources - resetMainObjects(); - resetDrawObjects(); - - outLineStyleIdx = addLineStyle_Internal(lineStyle); - assert(outLineStyleIdx != InvalidStyleIdx); - } - - return outLineStyleIdx; + activeLineStyle = lineStyle; } -uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettings) { - // before calling `addDTMSettings_Internal` we have made sute we have enough mem for - uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); - if (outDTMSettingIdx == InvalidDTMSettingsIdx) - { - // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); - - // resets itself - resetDTMSettings(); - resetLineStyles(); // additionally resets linestyles as well, just to be safe - // resets higher level resources - resetMainObjects(); - resetDrawObjects(); - - outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); - assert(outDTMSettingIdx != InvalidDTMSettingsIdx); - } - return outDTMSettingIdx; + activeDTMSettings = dtmSettings; } -uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, uint32_t dtmSettingsIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::beginMainObject(MainObjectType type) { - MainObject mainObject = {}; - mainObject.styleIdx = styleIdx; - mainObject.dtmSettingsIdx = dtmSettingsIdx; - mainObject.clipProjectionIndex = acquireCurrentClipProjectionIndex(intendedNextSubmit); - uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); - if (outMainObjectIdx == InvalidMainObjectIdx) - { - // failed to fit into remaining resources mem or exceeded max indexable mainobj - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); - - // resets itself - resetMainObjects(); - // resets higher level resources - resetDrawObjects(); - // we shouldn't reset lower level resources like linestyles and clip projections here because it was possibly requested to push to mem before addMainObjects + activeMainObjectType = type; + activeMainObjectIndex = InvalidMainObjectIdx; +} - // try to add again - outMainObjectIdx = addMainObject_Internal(mainObject); - assert(outMainObjectIdx != InvalidMainObjectIdx); - } - - return outMainObjectIdx; +void DrawResourcesFiller::endMainObject() +{ + activeMainObjectType = MainObjectType::NONE; + activeMainObjectIndex = InvalidMainObjectIdx; } void DrawResourcesFiller::pushClipProjectionData(const ClipProjectionData& clipProjectionData) { - clipProjections.push_back(clipProjectionData); - clipProjectionIndices.push_back(InvalidClipProjectionIndex); + activeClipProjections.push_back(clipProjectionData); + activeClipProjectionIndices.push_back(InvalidClipProjectionIndex); } void DrawResourcesFiller::popClipProjectionData() { - if (clipProjections.empty()) + if (activeClipProjections.empty()) return; - clipProjections.pop_back(); - clipProjectionIndices.pop_back(); + activeClipProjections.pop_back(); + activeClipProjectionIndices.pop_back(); } bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) @@ -626,27 +583,12 @@ const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); } -void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex) +void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex) { finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - // We reset Geometry Counters (drawObj+geometryInfos) because we're done rendering previous geometry - // We don't reset counters for styles because we will be reusing them - resetDrawObjects(); -} - -uint32_t DrawResourcesFiller::addMainObject_Internal(const MainObject& mainObject) -{ - const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - const size_t memRequired = sizeof(MainObject); - const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? - if (!enoughMem) - return InvalidMainObjectIdx; - if (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects) - return InvalidMainObjectIdx; - resourcesCollection.mainObjects.vector.push_back(mainObject); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers - return resourcesCollection.mainObjects.vector.size() - 1u; + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); } uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) @@ -667,8 +609,7 @@ uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineSty return i; } - resourcesCollection.lineStyles.vector.push_back(gpuLineStyle); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers - return resourcesCollection.lineStyles.vector.size() - 1u; + return resourcesCollection.lineStyles.addAndGetOffset(gpuLineStyle); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers } uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) @@ -710,37 +651,125 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm return i; } - resourcesCollection.dtmSettings.vector.push_back(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers - return resourcesCollection.dtmSettings.vector.size() - 1u; + resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} + +uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeLineStyleIndex == InvalidStyleIdx) + activeLineStyleIndex = addLineStyle_SubmitIfNeeded(activeLineStyle, intendedNextSubmit); + + return activeLineStyleIndex; } -uint32_t DrawResourcesFiller::acquireCurrentClipProjectionIndex(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjectionIndices.empty()) + if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) + activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); + + return activeDTMSettingsIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveClipProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeClipProjectionIndices.empty()) return InvalidClipProjectionIndex; - if (clipProjectionIndices.back() == InvalidClipProjectionIndex) - clipProjectionIndices.back() = addClipProjectionData_SubmitIfNeeded(clipProjections.back(), intendedNextSubmit); + if (activeClipProjectionIndices.back() == InvalidClipProjectionIndex) + activeClipProjectionIndices.back() = addClipProjectionData_SubmitIfNeeded(activeClipProjections.back(), intendedNextSubmit); - return clipProjectionIndices.back(); + return activeClipProjectionIndices.back(); } -uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { + if (activeMainObjectIndex != InvalidMainObjectIdx) + return activeMainObjectIndex; + if (activeMainObjectType == MainObjectType::NONE) + { + assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope + return InvalidMainObjectIdx; + } + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - const size_t memRequired = sizeof(ClipProjectionData); + // making sure MainObject and everything it references fits into remaining resources mem + size_t memRequired = sizeof(MainObject); + memRequired += ((activeMainObjectType == MainObjectType::DTM) ? sizeof(DTMSettings) : sizeof(LineStyle)); // needing LineStyle or DTMSettings depends on mainObject type + memRequired += (activeClipProjectionIndices.empty()) ? 0u : sizeof(ClipProjectionData); // if there is custom clip projections, account for it + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); + + if (needToOverflowSubmit) + { + // failed to fit into remaining resources mem or exceeded max indexable mainobj + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + MainObject mainObject = {}; + // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. + // if something here triggers a auto-submit it's a possible bug, TODO: assert that somehow? + mainObject.styleIdx = (activeMainObjectType == MainObjectType::DTM) ? InvalidStyleIdx : acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit); // only call if it requirees dtm + mainObject.dtmSettingsIdx = (activeMainObjectType == MainObjectType::DTM) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; // only call if it requirees dtm + mainObject.clipProjectionIndex = acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit); + activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); + return activeMainObjectIndex; +} - if (!enoughMem) +uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); + if (outLineStyleIdx == InvalidStyleIdx) { + // There wasn't enough resource memory remaining to fit a single LineStyle finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); // resets itself - resetCustomClipProjections(); + resetLineStyles(); // resets higher level resources resetMainObjects(); resetDrawObjects(); + + outLineStyleIdx = addLineStyle_Internal(lineStyle); + assert(outLineStyleIdx != InvalidStyleIdx); + } + + return outLineStyleIdx; +} + +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) +{ + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + if (outDTMSettingIdx == InvalidDTMSettingsIdx) + { + // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + // resets everything! be careful! + reset(); + + outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + assert(outDTMSettingIdx != InvalidDTMSettingsIdx); + } + return outDTMSettingIdx; +} + +uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(ClipProjectionData); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) + { + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + // resets everything! be careful! + reset(); } resourcesCollection.clipProjections.vector.push_back(clipProjectionData); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers @@ -1020,7 +1049,7 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt getHatchFillPatternMSDF = func; } -uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { if (!cpuImage) return InvalidTextureIdx; // TODO: Log @@ -1041,10 +1070,9 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor { // Dealloc once submission is finished msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); - - // If we reset main objects will cause an auto submission bug, where adding an msdf texture while constructing glyphs will have wrong main object references (See how SingleLineTexts add Glyphs with a single mainObject) - // for the same reason we don't reset line styles - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded } else { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 7ef3e2020..60e7c923c 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -78,6 +78,12 @@ struct DrawResourcesFiller vector.resize(offset + additionalSize); return offset; } + + uint32_t addAndGetOffset(const T& val) + { + vector.push_back(val); + return vector.size() - 1u; + } T* data() { return vector.data(); } }; @@ -148,7 +154,8 @@ struct DrawResourcesFiller //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); - void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + /// WARNING: make sure this function is called within begin/endMainObject scope + void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); void drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); @@ -172,8 +179,8 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); - - // ! Draw Font Glyph, will auto submit if there is no space + + /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, @@ -181,7 +188,6 @@ struct DrawResourcesFiller float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); void _test_addImageObject( @@ -210,24 +216,17 @@ struct DrawResourcesFiller /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` const size_t getCopiedResourcesSize() { return copiedResourcesSize; } - uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); - - uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - - // TODO[Przemek]: Read after reading the fragment shader comments and having a basic understanding of the relationship between "mainObject" and our programmable blending resolve: - // Use `addMainObject_SubmitIfNeeded` to push your single mainObject you'll be using for the enitre triangle mesh (this will ensure overlaps between triangles of the same mesh is resolved correctly) - // Delete comment when you understand this + // Setting Active Resources: + void setActiveLineStyle(const LineStyleInfo& lineStyle); + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettings); - // [ADVANCED] Do not use this function unless you know what you're doing (It may cause auto submit) - // Never call this function multiple times in a row before indexing it in a drawable, because future auto-submits may invalidate mainObjects, so do them one by one, for example: - // Valid: addMainObject1 --> addXXX(mainObj1) ---> addMainObject2 ---> addXXX(mainObj2) .... - // Invalid: addMainObject1 ---> addMainObject2 ---> addXXX(mainObj1) ---> addXXX(mainObj2) .... - uint32_t addMainObject_SubmitIfNeeded(uint32_t styleIdx, uint32_t dtmSettingsIdx, SIntendedSubmitInfo& intendedNextSubmit); + void beginMainObject(MainObjectType type); + void endMainObject(); - // we need to store the clip projection stack to make sure the front is always available in memory void pushClipProjectionData(const ClipProjectionData& clipProjectionData); void popClipProjectionData(); - const std::deque& getClipProjectionStack() const { return clipProjections; } + + const std::deque& getClipProjectionStack() const { return activeClipProjections; } smart_refctd_ptr getMSDFsTextureArray() { return msdfTextureArray; } @@ -255,33 +254,59 @@ struct DrawResourcesFiller const size_t calculateRemainingResourcesSize() const; - // Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects - void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex); - - /// @return index to added main object. - /// It will return `InvalidMainObjectIndex` if it there isn't enough remaining resources memory OR the index would exceed MaxIndexableMainObjects - uint32_t addMainObject_Internal(const MainObject& mainObject); + // TODO: Find better name for function + /// @brief Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects + /// @param intendedNextSubmit + /// @param mainObjectIndex: function updates mainObjectIndex after submitting, clearing everything and acquiring mainObjectIndex again. + void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex); - uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + // Gets resource index to the active clip projection data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveClipProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active main object data + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer - // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. - uint32_t acquireCurrentClipProjectionIndex(SIntendedSubmitInfo& intendedNextSubmit); + /// Attempts to add lineStyle to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + /// Attempts to add clipProjection to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); - + + /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations + uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + + /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given polyline connectors considering resource limitations void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given hatch considering resource limitations void addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex); + /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); void resetMainObjects() @@ -301,19 +326,21 @@ struct DrawResourcesFiller { resourcesCollection.clipProjections.vector.clear(); - // Invalidate all the clip projection addresses because clipProjections buffer got reset - for (auto& clipProjAddr : clipProjectionIndices) + // Invalidate all the clip projection addresses because activeClipProjections buffer got reset + for (auto& clipProjAddr : activeClipProjectionIndices) clipProjAddr = InvalidClipProjectionIndex; } void resetLineStyles() { resourcesCollection.lineStyles.vector.clear(); + activeLineStyleIndex = InvalidStyleIdx; } void resetDTMSettings() { resourcesCollection.dtmSettings.vector.clear(); + activeDTMSettingsIndex = InvalidDTMSettingsIdx; } // MSDF Hashing and Caching Internal Functions @@ -404,7 +431,7 @@ struct DrawResourcesFiller // ! mainObjIdx: make sure to pass your mainObjIdx to it if you want it to stay synced/updated if some overflow submit occured which would potentially erase what your mainObject points at. // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx - uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); // ResourcesCollection and packed into GPUBuffer ResourcesCollection resourcesCollection; @@ -415,8 +442,19 @@ struct DrawResourcesFiller smart_refctd_ptr m_utilities; IQueue* m_copyQueue; - std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. - std::deque clipProjectionIndices; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops + // Active Resources we need to keep track of and push to resources buffer if needed. + LineStyleInfo activeLineStyle; + uint32_t activeLineStyleIndex = InvalidStyleIdx; + + DTMSettingsInfo activeDTMSettings; + uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; + + MainObjectType activeMainObjectType; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + + // The ClipProjections are stack, because user can push/pop ClipProjections in any order + std::deque activeClipProjections; // stack of clip projections stored so we can resubmit them if geometry buffer got reset. + std::deque activeClipProjectionIndices; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops // MSDF GetGlyphMSDFTextureFunc getGlyphMSDF; diff --git a/62_CAD/SingleLineText.cpp b/62_CAD/SingleLineText.cpp index f68f78db3..ea755a2df 100644 --- a/62_CAD/SingleLineText.cpp +++ b/62_CAD/SingleLineText.cpp @@ -63,8 +63,8 @@ void SingleLineText::Draw( lineStyle.color = color; lineStyle.screenSpaceLineWidth = tan(tiltTiltAngle); lineStyle.worldSpaceLineWidth = boldInPixels; - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - auto glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); + drawResourcesFiller.setActiveLineStyle(lineStyle); + drawResourcesFiller.beginMainObject(MainObjectType::TEXT); for (const auto& glyphBox : m_glyphBoxes) { @@ -75,7 +75,8 @@ void SingleLineText::Draw( // float32_t3 xx = float64_t3(0.0, -glyphBox.size.y, 0.0); const float32_t aspectRatio = static_cast(glm::length(dirV) / glm::length(dirU)); // check if you can just do: (glyphBox.size.y * scale.y) / glyphBox.size.x * scale.x) const float32_t2 minUV = face->getUV(float32_t2(0.0f,0.0f), glyphBox.size, drawResourcesFiller.getMSDFResolution(), MSDFPixelRange); - drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, glyphObjectIdx, intendedNextSubmit); + drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, intendedNextSubmit); } + drawResourcesFiller.endMainObject(); } \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 5f9e88694..c11108599 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_4; +constexpr ExampleMode mode = ExampleMode::CASE_6; class Camera2D { @@ -3022,15 +3022,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto penY = -500.0; auto previous = 0; - uint32_t glyphObjectIdx; - { - LineStyleInfo lineStyle = {}; - lineStyle.color = float32_t4(1.0, 1.0, 1.0, 1.0); - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - - glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, InvalidDTMSettingsIdx, intendedNextSubmit); - } - float64_t2 currentBaselineStart = float64_t2(0.0, 0.0); float64_t scale = 1.0 / 64.0; diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 562f523e6..b7686684d 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -113,6 +113,16 @@ pfloat64_t2 transformVectorNdc(NBL_CONST_REF_ARG(pfloat64_t3x3) transformation, } #endif +enum class MainObjectType : uint32_t +{ + NONE = 0u, + POLYLINE, + HATCH, + TEXT, + IMAGE, + DTM, +}; + enum class ObjectType : uint32_t { LINE = 0u, @@ -282,7 +292,7 @@ NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff; NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; -// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like LineStyle stipple patterns) and requires upper_bound to figure out the color based on height value. +// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like stipple patterns) and requires upper_bound to figure out the color based on height value. // We'll discuss that later or what it will be looking like and how it's gonna get passed to our shaders. struct TriangleMeshVertex From e7b63ee06e9f002e55eb4654156263b013f429c1 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 14:20:03 +0330 Subject: [PATCH 028/129] small fix --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 8ad13cb97..f2d850cd4 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -711,7 +711,7 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten MainObject mainObject = {}; // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. // if something here triggers a auto-submit it's a possible bug, TODO: assert that somehow? - mainObject.styleIdx = (activeMainObjectType == MainObjectType::DTM) ? InvalidStyleIdx : acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit); // only call if it requirees dtm + mainObject.styleIdx = (activeMainObjectType == MainObjectType::DTM) ? InvalidStyleIdx : acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit); // only call if it requirees dtm mainObject.dtmSettingsIdx = (activeMainObjectType == MainObjectType::DTM) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; // only call if it requirees dtm mainObject.clipProjectionIndex = acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit); activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); From 468dab1e9d25d186be08a79af333ff286da6257c Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 14:21:49 +0330 Subject: [PATCH 029/129] another small fix --- 62_CAD/DrawResourcesFiller.h | 1 + 1 file changed, 1 insertion(+) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 60e7c923c..e88b032cd 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -312,6 +312,7 @@ struct DrawResourcesFiller void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); + activeMainObjectIndex = InvalidMainObjectIdx; } // these resources are data related to chunks of a whole mainObject From bb793be88e46143ce29065e720e37f99306b4795 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 14:29:33 +0330 Subject: [PATCH 030/129] [temp] lower mem to test auto-submit --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 62_CAD/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index f2d850cd4..23b03f97f 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -19,7 +19,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, { size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); - size = 512u; + size = 368u; IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c11108599..cd673278c 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_6; +constexpr ExampleMode mode = ExampleMode::CASE_8; class Camera2D { From bcb5fa3e0dc9d05fc73efdd6dede89069b4bb3e9 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 15:39:18 +0330 Subject: [PATCH 031/129] fixed linestyle index fetching --- 62_CAD/DrawResourcesFiller.cpp | 2 ++ 62_CAD/DrawResourcesFiller.h | 1 + 2 files changed, 3 insertions(+) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 23b03f97f..ce267fb92 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -341,11 +341,13 @@ bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNe void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) { activeLineStyle = lineStyle; + activeLineStyleIndex = InvalidStyleIdx; } void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettings) { activeDTMSettings = dtmSettings; + activeDTMSettingsIndex = InvalidDTMSettingsIdx; } void DrawResourcesFiller::beginMainObject(MainObjectType type) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index e88b032cd..d6e3c7968 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -198,6 +198,7 @@ struct DrawResourcesFiller bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief resets resources buffers void reset() { resetDrawObjects(); From da76699d9356589008fa5190d3feeffd1d2c0a68 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 15:57:15 +0330 Subject: [PATCH 032/129] fixed acquireMainObjectIndex --- 62_CAD/DrawResourcesFiller.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ce267fb92..3c808478d 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -19,7 +19,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, { size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); - size = 368u; + // size = 368u; IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; @@ -693,11 +693,19 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten return InvalidMainObjectIdx; } + const bool needsLineStyle = + (activeMainObjectType == MainObjectType::POLYLINE) || + (activeMainObjectType == MainObjectType::HATCH) || + (activeMainObjectType == MainObjectType::TEXT); + const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM); + const bool needsCustomClipProjection = (!activeClipProjectionIndices.empty()); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); // making sure MainObject and everything it references fits into remaining resources mem size_t memRequired = sizeof(MainObject); - memRequired += ((activeMainObjectType == MainObjectType::DTM) ? sizeof(DTMSettings) : sizeof(LineStyle)); // needing LineStyle or DTMSettings depends on mainObject type - memRequired += (activeClipProjectionIndices.empty()) ? 0u : sizeof(ClipProjectionData); // if there is custom clip projections, account for it + if (needsLineStyle) memRequired += sizeof(LineStyle); + if (needsDTMSettings) memRequired += sizeof(DTMSettings); + if (needsCustomClipProjection) memRequired += sizeof(ClipProjectionData); const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); @@ -713,9 +721,9 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten MainObject mainObject = {}; // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. // if something here triggers a auto-submit it's a possible bug, TODO: assert that somehow? - mainObject.styleIdx = (activeMainObjectType == MainObjectType::DTM) ? InvalidStyleIdx : acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit); // only call if it requirees dtm - mainObject.dtmSettingsIdx = (activeMainObjectType == MainObjectType::DTM) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; // only call if it requirees dtm - mainObject.clipProjectionIndex = acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit); + mainObject.styleIdx = (needsLineStyle) ? acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidStyleIdx; + mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; + mainObject.clipProjectionIndex = (needsCustomClipProjection) ? acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidClipProjectionIndex; activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); return activeMainObjectIndex; } From ed920b0dfa561146782c6434ebacc9afa2644843 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 2 Apr 2025 16:15:50 +0330 Subject: [PATCH 033/129] Fix DTM Rendering --- 62_CAD/DrawResourcesFiller.cpp | 5 ++++- 62_CAD/main.cpp | 3 +-- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 5 +++-- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 3c808478d..09651eef2 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -137,6 +137,8 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::DTM); + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); drawData.pushConstants.triangleMeshMainObjectIndex = mainObjectIdx; @@ -172,6 +174,7 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM } drawData.indexCount = mesh.getIndexCount(); + endMainObject(); } // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor @@ -653,7 +656,7 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm return i; } - resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers } uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index cd673278c..4797aa281 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_8; +constexpr ExampleMode mode = ExampleMode::CASE_9; class Camera2D { @@ -1429,7 +1429,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void addObjects(SIntendedSubmitInfo& intendedNextSubmit) { - // TODO[Przemek]: add your own case, you won't call any other drawResourcesFiller function, only drawMesh with your custom made Mesh (for start it can be a single triangle) // we record upload of our objects and if we failed to allocate we submit everything diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 4852d0522..3e583600f 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -425,14 +425,15 @@ float4 fragMain(PSInput input) : SV_TARGET ObjectType objType = input.getObjType(); const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = loadMainObject(currentMainObjectIdx); - + +#define DTM #ifdef DTM // TRIANGLE RENDERING { const float outlineThickness = input.getOutlineThickness(); const float contourThickness = input.getContourLineThickness(); const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; // TODO: figure out what is it for + const float stretch = 1.0f; // TODO: figure out what is it for ---> [ERFAN's REPLY: no need to give shit about this in dtms, it's for special shape styles] const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index b62cbe543..a808a459d 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -84,7 +84,8 @@ PSInput main(uint vertexID : SV_VertexID) // your programmable pulling will use the baseVertexBufferAddress BDA address and `vertexID` to RawBufferLoad it's vertex. // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. - + +#define DTM #ifdef DTM PSInput outV; @@ -119,7 +120,7 @@ PSInput main(uint vertexID : SV_VertexID) ); // TODO: line style of contour line has to be set too! - DTMSettings dtm = dtmSettings[mainObj.dtmSettingsIdx]; + DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); From 9d23afd6ee1388459ec5793d6ddbb790b63ee6de Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 3 Apr 2025 09:53:08 +0330 Subject: [PATCH 034/129] Fixed auto-submission bug with self-blending in a beautiful and simple way :) --- 62_CAD/DrawResourcesFiller.cpp | 4 +- 62_CAD/DrawResourcesFiller.h | 4 +- 62_CAD/main.cpp | 17 ++++---- 62_CAD/shaders/globals.hlsl | 5 ++- .../main_pipeline/fragment_shader.hlsl | 2 +- .../shaders/main_pipeline/resolve_alphas.hlsl | 42 +++++++++++++------ .../shaders/main_pipeline/vertex_shader.hlsl | 2 +- 7 files changed, 49 insertions(+), 27 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 09651eef2..ee9aca985 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -19,7 +19,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, { size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); - // size = 368u; + size = 368u; IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; @@ -593,7 +593,7 @@ void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded - mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); // it will be 0 because it's first mainObjectIndex after reset and invalidation } uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index d6e3c7968..8ee4ff1b5 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -239,6 +239,9 @@ struct DrawResourcesFiller return msdfTextureArray->getCreationParameters().image->getCreationParameters().mipLevels; } + /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj + uint32_t getActiveMainObjectIndex() const { return activeMainObjectIndex; } + protected: struct MSDFTextureCopy @@ -255,7 +258,6 @@ struct DrawResourcesFiller const size_t calculateRemainingResourcesSize() const; - // TODO: Find better name for function /// @brief Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects /// @param intendedNextSubmit /// @param mainObjectIndex: function updates mainObjectIndex after submitting, clearing everything and acquiring mainObjectIndex again. diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 4797aa281..de639f8ba 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_9; +constexpr ExampleMode mode = ExampleMode::CASE_4; class Camera2D { @@ -1213,6 +1213,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu globalData.screenToWorldRatio = screenToWorld; globalData.worldToScreenRatio = (1.0/screenToWorld); globalData.miterLimit = 10.0f; + globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); assert(updateSuccess); @@ -1883,8 +1884,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo style = {}; style.screenSpaceLineWidth = 4.0f; - style.worldSpaceLineWidth = 0.0f; - style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + style.worldSpaceLineWidth = 2.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.1f); LineStyleInfo style2 = {}; style2.screenSpaceLineWidth = 2.0f; @@ -1957,7 +1958,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu myCurve.majorAxis = { -10.0, 5.0 }; myCurve.center = { 0, -5.0 }; myCurve.angleBounds = { - nbl::core::PI() * 2.0, + nbl::core::PI() * 1.0, nbl::core::PI() * 0.0 }; myCurve.eccentricity = 1.0; @@ -1985,10 +1986,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } drawResourcesFiller.drawPolyline(originalPolyline, style, intendedNextSubmit); - //CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); - //drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); + CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(10.0 * 0.0009))); + CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(10.0 * 0.0009))); + drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); + drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); } else if (mode == ExampleMode::CASE_4) { diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index b7686684d..319c30b3d 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -74,8 +74,9 @@ struct Globals pfloat64_t worldToScreenRatio; uint32_t2 resolution; float antiAliasingFactor; - float miterLimit; - float32_t2 _padding; + uint32_t miterLimit; + uint32_t currentlyActiveMainObjectIndex; // for alpha resolve to skip resolving activeMainObjectIdx and prep it for next submit + float32_t _padding; }; #ifndef __HLSL_VERSION static_assert(sizeof(Globals) == 176u); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 3e583600f..4716f0a66 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -426,7 +426,7 @@ float4 fragMain(PSInput input) : SV_TARGET const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = loadMainObject(currentMainObjectIdx); -#define DTM +//#define DTM #ifdef DTM // TRIANGLE RENDERING { diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index c75c86825..987dd7c29 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -16,26 +16,44 @@ template<> float32_t4 calculateFinalColor(const uint2 fragCoord) { float32_t4 color; - - nbl::hlsl::spirv::beginInvocationInterlockEXT(); + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + bool resolve = false; + uint32_t toResolveStyleIdx = InvalidStyleIdx; const uint32_t packedData = pseudoStencil[fragCoord]; const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool resolve = storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; + const bool currentlyActiveMainObj = (storedMainObjectIdx == globals.currentlyActiveMainObjectIndex); + if (!currentlyActiveMainObj) + { + // Normal Scenario, this branch will always be taken if there is no overflow submit in the middle of an active mainObject + //we do the final resolve of the pixel and invalidate the pseudo-stencil + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); + + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + resolve = storedMainObjectIdx != InvalidMainObjectIdx; - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + } + else if (globals.currentlyActiveMainObjectIndex != InvalidMainObjectIdx) { - toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + // Being here means there was an overflow submit in the middle of an active main objejct + // We don't want to resolve the active mainObj, because it needs to fully resolved later when the mainObject actually finishes. + // We change the active main object index in our pseudo-stencil to 0u, because that will be it's new index in the next submit. + uint32_t newMainObjectIdx = 0u; + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(storedQuantizedAlpha, newMainObjectIdx, AlphaBits, MainObjectIdxBits); + resolve = false; // just to re-iterate that we don't want to resolve this. } + nbl::hlsl::spirv::endInvocationInterlockEXT(); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index a808a459d..d45eac46f 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -85,7 +85,7 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. -#define DTM +//#define DTM #ifdef DTM PSInput outV; From 126afa88f659e40f721a8ffe265703e0c5ea06f7 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 3 Apr 2025 10:14:49 +0330 Subject: [PATCH 035/129] updates to comments regarding auto-submit --- 62_CAD/DrawResourcesFiller.cpp | 26 ++++++++------------------ 62_CAD/DrawResourcesFiller.h | 13 +++++++------ 62_CAD/main.cpp | 2 +- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ee9aca985..a6d975f5c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -19,7 +19,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, { size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); - size = 368u; + // size = 368u; STRESS TEST IGPUBuffer::SCreationParams geometryCreationParams = {}; geometryCreationParams.size = size; geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; @@ -185,9 +185,7 @@ void DrawResourcesFiller::drawHatch( const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) { - // TODO[Optimization Idea]: don't draw hatch twice if both colors are visible: instead do the msdf inside the alpha resolve by detecting mainObj being a hatch - // https://discord.com/channels/593902898015109131/856835291712716820/1228337893366300743 - // TODO: Come back to this idea when doing color resolve for ecws (they don't have mainObj/style Index, instead they have uv into a texture + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) // if backgroundColor is visible drawHatch(hatch, backgroundColor, intendedNextSubmit); @@ -602,8 +600,7 @@ uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineSty const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? if (!enoughMem) return InvalidStyleIdx; - // TODO: Additionally constraint by a max size? and return InvalidIdx if it would exceed - + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( @@ -625,7 +622,7 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm if (!enoughMem) return InvalidDTMSettingsIdx; - // TODO: Additionally constraint by a max size? and return InvalidIdx if it would exceed + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed DTMSettings dtmSettings; dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourLinesStartHeight; @@ -723,7 +720,7 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten MainObject mainObject = {}; // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. - // if something here triggers a auto-submit it's a possible bug, TODO: assert that somehow? + // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? mainObject.styleIdx = (needsLineStyle) ? acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidStyleIdx; mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; mainObject.clipProjectionIndex = (needsCustomClipProjection) ? acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidClipProjectionIndex; @@ -739,12 +736,7 @@ uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& l // There wasn't enough resource memory remaining to fit a single LineStyle finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - // resets itself - resetLineStyles(); - // resets higher level resources - resetMainObjects(); - resetDrawObjects(); + reset(); // resets everything! be careful! outLineStyleIdx = addLineStyle_Internal(lineStyle); assert(outLineStyleIdx != InvalidStyleIdx); @@ -762,8 +754,7 @@ uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInf // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - // resets everything! be careful! - reset(); + reset(); // resets everything! be careful! outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); assert(outDTMSettingIdx != InvalidDTMSettingsIdx); @@ -781,8 +772,7 @@ uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipPro { finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - // resets everything! be careful! - reset(); + reset(); // resets everything! be careful! } resourcesCollection.clipProjections.vector.push_back(clipProjectionData); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 8ee4ff1b5..03482320e 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -67,10 +67,8 @@ struct DrawResourcesFiller } /// @brief increases size of general-purpose resources that hold bytes - /// @param additionalSize /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= ResourcesMaxNaturalAlignment, only use this if storing raw bytes in vector /// @return pointer to start of the data to be filled, up to additional size - // TODO: make sure t is 1 byte with templates. size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) { assert(core::isPoT(alignment) && alignment <= ResourcesMaxNaturalAlignment); @@ -92,7 +90,6 @@ struct DrawResourcesFiller struct ResourcesCollection { // auto-submission level 0 resources (settings that mainObj references) - // Not enough VRAM available to serve adding one of the level 0 resources: they clear themselves and everything from higher levels after doing submission CPUGeneratedResource lineStyles; CPUGeneratedResource dtmSettings; CPUGeneratedResource clipProjections; @@ -102,11 +99,11 @@ struct DrawResourcesFiller // auto-submission level 2 buffers CPUGeneratedResource drawObjects; - CPUGeneratedResource indexBuffer; // this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders - CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom geometries, etc + CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) // Get Total memory consumption, If all ResourcesCollection get packed together with ResourcesMaxNaturalAlignment - // used to decide when to overflow + // used to decide the remaining memory and when to overflow size_t calculateTotalConsumption() const { return @@ -154,6 +151,7 @@ struct DrawResourcesFiller //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); @@ -180,6 +178,7 @@ struct DrawResourcesFiller const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); + /// Used by SingleLineText, Issue drawing a font glyph /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, @@ -196,6 +195,8 @@ struct DrawResourcesFiller float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit); + /// @brief call this function before submitting to ensure all resources are copied + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); /// @brief resets resources buffers diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index de639f8ba..791c8fc04 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_4; +constexpr ExampleMode mode = ExampleMode::CASE_6; class Camera2D { From 6830c089c9e6ecd7d84e5526a0112f750660b554 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 3 Apr 2025 10:19:52 +0330 Subject: [PATCH 036/129] update TODO, need to handle it, after figuring out compute stages and what vertex buffers and index buffers look like --- 62_CAD/DrawResourcesFiller.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index a6d975f5c..c566de456 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -279,7 +279,7 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 const size_t remainingResourcesSize = calculateRemainingResourcesSize(); const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex if (uploadableObjects <= 0u) return false; @@ -794,7 +794,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po const size_t remainingResourcesSize = calculateRemainingResourcesSize(); const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; @@ -850,7 +850,7 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const // how many lines fit into mem? --> memConsumption = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount const uint32_t uploadableObjects = (remainingResourcesSize - sizeof(LinePointInfo)) / (sizeof(LinePointInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex const uint32_t lineCount = section.count; const uint32_t remainingObjects = lineCount - currentObjectInSection; @@ -904,7 +904,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // how many quad bezier objects fit into mem? // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; @@ -960,7 +960,7 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren const size_t remainingResourcesSize = calculateRemainingResourcesSize(); const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -1010,7 +1010,7 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint const size_t remainingResourcesSize = calculateRemainingResourcesSize(); const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account, our limit of max index buffer and vettex buffer size or constrainst other than mem + // TODO[ERFAN]: later take into account: our maximum indexable vertex if (uploadableObjects <= 0u) return false; From 416d7b3735c8ca313928e70a71fdf606551d1850 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 4 Apr 2025 12:30:44 +0200 Subject: [PATCH 037/129] Saving work --- .../main_pipeline/fragment_shader.hlsl | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 225c0636e..39a9601c1 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -464,9 +464,7 @@ float4 fragMain(PSInput input) : SV_TARGET float minShadingHeight = dtm.heightColorMapHeights[0]; float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; - const bool isHeightBetweenMinAndMax = height >= minShadingHeight && height <= maxShadingHeight; - const bool isHeightColorMapNotEmpty = heightMapSize > 0; - if (isHeightColorMapNotEmpty && isHeightBetweenMinAndMax) + if (heightMapSize > 0) { DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); @@ -476,23 +474,38 @@ float4 fragMain(PSInput input) : SV_TARGET uint32_t mapIndexPlus1 = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); uint32_t mapIndex = mapIndexPlus1 == 0 ? mapIndexPlus1 : mapIndexPlus1 - 1; + float heightDeriv = fwidth(height); + bool blendWithPrev = true + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); + // logic explainer: if colorIdx is 0.0 then it means blend with next // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values - bool blendWithPrev = (mapIndex > 0) - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); - float heightDeriv = fwidth(height); if (blendWithPrev) { - float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; - float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); + if (mapIndex > 0) + { + float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; + float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); + } + else + { + textureColor = dtm.heightColorMapColors[mapIndex].rgb; + } } else { - float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; - float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); + if (mapIndex < heightMapSize - 1) + { + float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; + float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); + } + else + { + textureColor = dtm.heightColorMapColors[mapIndex].rgb; + } } localAlpha = dtm.heightColorMapColors[mapIndex].a; From 06f72c50cd2b575741faa61ad9624f688817f41c Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 8 Apr 2025 15:50:35 +0200 Subject: [PATCH 038/129] Fixed anti aliasing --- 62_CAD/main.cpp | 19 +++++++- .../main_pipeline/fragment_shader.hlsl | 43 ++++++++++++++++++- .../shaders/main_pipeline/vertex_shader.hlsl | 13 +++++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 791c8fc04..b49dc56d2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -75,7 +75,7 @@ constexpr std::array cameraExtents = 600.0 // CASE_9 }; -constexpr ExampleMode mode = ExampleMode::CASE_6; +constexpr ExampleMode mode = ExampleMode::CASE_9; class Camera2D { @@ -3156,6 +3156,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_9) { + // GRID + /*core::vector vertices = { { float32_t2(-200.0f, -200.0f), 10.0f }, { float32_t2(-50.0f, -200.0f), 50.0f }, @@ -3186,6 +3188,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 8, 10, 11 };*/ + // PYRAMID + core::vector vertices = { { float32_t2(0.0, 0.0), 100.0 }, { float32_t2(-200.0, -200.0), 10.0 }, @@ -3201,6 +3205,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0, 4, 1 }; + // SINGLE TRIANGLE + /*core::vector vertices = { + { float32_t2(0.0, 0.0), -20.0 }, + { float32_t2(200.0, 200.0), 100.0 }, + { float32_t2(200.0, -200.0), 80.0 } + }; + + core::vector indices = { + 0, 1, 2 + };*/ + CTriangleMesh mesh; mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); @@ -3240,7 +3255,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu //dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 1.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 293943e64..5d5d464cc 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -415,6 +415,11 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } +float dot2(in float2 vec) +{ + return dot(vec, vec); +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -426,7 +431,7 @@ float4 fragMain(PSInput input) : SV_TARGET const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); const MainObject mainObj = loadMainObject(currentMainObjectIdx); -//#define DTM +#define DTM #ifdef DTM // TRIANGLE RENDERING { @@ -468,6 +473,35 @@ float4 fragMain(PSInput input) : SV_TARGET if (heightMapSize > 0) { + // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez + float2 e0 = v[1] - v[0]; + float2 e1 = v[2] - v[1]; + float2 e2 = v[0] - v[2]; + + float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); + float2 v0 = input.position.xy - v[0]; + float2 v1 = input.position.xy - v[1]; + float2 v2 = input.position.xy - v[2]; + + float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); + float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); + float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); + + float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); + float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); + float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); + float heightDeriv = fwidth(height); + float line3Sdf = (minShadingHeight - height) / heightDeriv; + float line4Sdf = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = max(line0Sdf, line1Sdf); + convexPolygonSdf = max(convexPolygonSdf, line2Sdf); + convexPolygonSdf = max(convexPolygonSdf, line3Sdf); + convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + + localAlpha = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); + + // calculate height color DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) @@ -510,7 +544,7 @@ float4 fragMain(PSInput input) : SV_TARGET } } - localAlpha = dtm.heightColorMapColors[mapIndex].a; + //localAlpha = dtm.heightColorMapColors[mapIndex].a; } else { @@ -546,6 +580,11 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = lerp(lowerBoundColor.a, upperBoundColor.a, interpolationVal);; } } + //else // TODO: remove!! + //{ + // printf("WTF"); + // return float4(0.0f, 0.0f, 0.0f, 1.0f); + //} // CONTOUR diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index d45eac46f..4a955d92d 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -85,7 +85,7 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. -//#define DTM +#define DTM #ifdef DTM PSInput outV; @@ -130,6 +130,17 @@ PSInput main(uint vertexID : SV_VertexID) outV.setOutlineThickness(sdfOutlineThickness); outV.setContourLineThickness(sdfContourLineThickness); + // full screen triangle (this will destroy outline, contour line and height drawing) +#if 0 + const uint vertexIdx = vertexID % 3; + if(vertexIdx == 0) + outV.position.xy = float2(-1.0f, -1.0f); + else if (vertexIdx == 1) + outV.position.xy = float2(-1.0f, 3.0f); + else if (vertexIdx == 2) + outV.position.xy = float2(3.0f, -1.0f); +#endif + return outV; #else From e4e7f1ec8476ff7d8c3f9bf71002c40c9585b81d Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 10 Apr 2025 08:44:20 +0330 Subject: [PATCH 039/129] multiple draw calls to allow dtms and linework simultaneously. --- 62_CAD/CTriangleMesh.h | 9 - 62_CAD/DrawResourcesFiller.cpp | 17 +- 62_CAD/DrawResourcesFiller.h | 44 +- 62_CAD/main.cpp | 86 +- 62_CAD/shaders/globals.hlsl | 1 + 62_CAD/shaders/main_pipeline/common.hlsl | 2 + .../main_pipeline/fragment_shader.hlsl | 1941 +++++++++-------- .../shaders/main_pipeline/vertex_shader.hlsl | 951 ++++---- 8 files changed, 1554 insertions(+), 1497 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 374fae1b4..6c68cec27 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -69,13 +69,6 @@ class CTriangleMesh final using index_t = uint32_t; using vertex_t = TriangleMeshVertex; - struct DrawData - { - PushConstants pushConstants; - uint64_t indexBufferOffset; - uint64_t indexCount; - }; - inline void setVertices(core::vector&& vertices) { m_vertices = std::move(vertices); @@ -107,8 +100,6 @@ class CTriangleMesh final return m_indices.size(); } - -private: core::vector m_vertices; core::vector m_indices; }; \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c566de456..d28843a31 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -134,13 +134,18 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS } } -void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { + flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls + setActiveDTMSettings(dtmSettingsInfo); beginMainObject(MainObjectType::DTM); + DrawCallData drawCallData = {}; + drawCallData.isDTMRendering = true; + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - drawData.pushConstants.triangleMeshMainObjectIndex = mainObjectIdx; + drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; ICPUBuffer::SCreationParams geometryBuffParams; @@ -162,18 +167,19 @@ void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, CTriangleM size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(dataToAddByteSize, alignof(CTriangleMesh::vertex_t)); void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` - drawData.pushConstants.triangleMeshVerticesBaseAddress = geometryBufferOffset; + drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset; memcpy(dst, mesh.getVertices().data(), vtxBuffByteSize); geometryBufferOffset += vtxBuffByteSize; // Copy IndexBuffer dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; - drawData.indexBufferOffset = geometryBufferOffset; + drawCallData.dtm.indexBufferOffset = geometryBufferOffset; memcpy(dst, mesh.getIndices().data(), indexBuffByteSize); geometryBufferOffset += indexBuffByteSize; } - drawData.indexCount = mesh.getIndexCount(); + drawCallData.dtm.indexCount = mesh.getIndexCount(); + drawCalls.push_back(drawCallData); endMainObject(); } @@ -334,6 +340,7 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) { bool success = true; + flushDrawObjects(); success &= finalizeBufferCopies(intendedNextSubmit); success &= finalizeTextureCopies(intendedNextSubmit); return success; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 03482320e..846046a43 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -155,7 +155,7 @@ struct DrawResourcesFiller /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); - void drawTriangleMesh(const CTriangleMesh& mesh, CTriangleMesh::DrawData& drawData, const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh(const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( @@ -207,6 +207,9 @@ struct DrawResourcesFiller resetCustomClipProjections(); resetLineStyles(); resetDTMSettings(); + + drawObjectsFlushedToDrawCalls = 0ull; + drawCalls.clear(); } /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders @@ -243,6 +246,45 @@ struct DrawResourcesFiller /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj uint32_t getActiveMainObjectIndex() const { return activeMainObjectIndex; } + // TODO: Remove these later, these are for multiple draw calls instead of a single one. + struct DrawCallData + { + union + { + struct Dtm + { + uint64_t indexBufferOffset; + uint64_t indexCount; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + } dtm; + struct DrawObj + { + uint64_t drawObjectStart = 0ull; + uint64_t drawObjectCount = 0ull; + } drawObj; + }; + bool isDTMRendering; + }; + + uint64_t drawObjectsFlushedToDrawCalls = 0ull; + + void flushDrawObjects() + { + if (resourcesCollection.drawObjects.getCount() > drawObjectsFlushedToDrawCalls) + { + DrawCallData drawCall = {}; + drawCall.isDTMRendering = false; + drawCall.drawObj.drawObjectStart = drawObjectsFlushedToDrawCalls; + drawCall.drawObj.drawObjectCount = resourcesCollection.drawObjects.getCount() - drawObjectsFlushedToDrawCalls; + drawCalls.push_back(drawCall); + drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); + } + } + + std::vector drawCalls; // either dtms or objects + + protected: struct MSDFTextureCopy diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index b49dc56d2..e425dce54 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -802,7 +802,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } const asset::SPushConstantRange range = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstants) }; @@ -1185,6 +1185,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) { + // TODO: Remove this check later + if (inBetweenSubmit) + { + m_logger->log("Temporarily Disabled. Auto-Submission shouldn't happen (for Demo)", ILogger::ELL_ERROR); + assert(!inBetweenSubmit); + } + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; @@ -1295,36 +1302,42 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - const uint32_t currentIndexCount = resources.drawObjects.getCount() * 6u; - IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + + cb->bindGraphicsPipeline(graphicsPipeline.get()); - if (mode == ExampleMode::CASE_9) + for (auto& drawCall : drawResourcesFiller.drawCalls) { + if (drawCall.isDTMRendering) + { + cb->bindIndexBuffer({ .offset = resources.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); - // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` - // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = resources.geometryInfo.bufferOffset + m_triangleMeshDrawData.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); + PushConstants pc = { + .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resources.geometryInfo.bufferOffset, + .triangleMeshMainObjectIndex = drawCall.dtm.triangleMeshMainObjectIndex, + .isDTMRendering = true + }; + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - // TODO[Przemek]: binding the same pipelie, no need to change. - cb->bindGraphicsPipeline(graphicsPipeline.get()); + cb->drawIndexed(drawCall.dtm.indexCount, 1u, 0u, 0u, 0u); + } + else + { + PushConstants pc = { + .isDTMRendering = false + }; + cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. - m_triangleMeshDrawData.pushConstants.triangleMeshVerticesBaseAddress += resourcesGPUBuffer->getDeviceAddress() + resources.geometryInfo.bufferOffset; - cb->pushConstants(graphicsPipeline->getLayout(), IGPUShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(PushConstants), &m_triangleMeshDrawData.pushConstants); + const uint64_t indexOffset = drawCall.drawObj.drawObjectStart * 6u; + const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; - // TODO[Przemek]: draw parameters needs to reflect the mesh involved - cb->drawIndexed(m_triangleMeshDrawData.indexCount, 1u, 0u, 0u, 0u); - } - else - { - assert(currentIndexCount == resources.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resources.indexBuffer.bufferOffset, .buffer = resourcesGPUBuffer.get() }, asset::EIT_32BIT); - cb->bindGraphicsPipeline(graphicsPipeline.get()); - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + // assert(currentIndexCount == resources.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resources.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); + } } - + if (fragmentShaderInterlockEnabled) { cb->bindGraphicsPipeline(resolveAlphaGraphicsPipeline.get()); @@ -1333,10 +1346,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if constexpr (DebugModeWireframe) { + const uint32_t indexCount = resources.drawObjects.getCount() * 6u; cb->bindGraphicsPipeline(debugGraphicsPipeline.get()); - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } - + cb->endRenderPass(); if (!inBetweenSubmit) @@ -3191,11 +3205,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // PYRAMID core::vector vertices = { - { float32_t2(0.0, 0.0), 100.0 }, - { float32_t2(-200.0, -200.0), 10.0 }, - { float32_t2(200.0, -200.0), 10.0 }, - { float32_t2(200.0, 200.0), -20.0 }, - { float32_t2(-200.0, 200.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, + { float64_t2(200.0, -200.0), 10.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, }; core::vector indices = { @@ -3277,7 +3291,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - drawResourcesFiller.drawTriangleMesh(mesh, m_triangleMeshDrawData, dtmSettingsInfo, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, dtmSettingsInfo, intendedNextSubmit); + + dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); + for (auto& v : mesh.m_vertices) + { + v.pos += float64_t2(400.0, 200.0); + v.height -= 10.0; + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmSettingsInfo, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); @@ -3360,8 +3384,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu #endif std::unique_ptr m_geoTextureRenderer; - - CTriangleMesh::DrawData m_triangleMeshDrawData; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 319c30b3d..24a833334 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -35,6 +35,7 @@ struct PushConstants { uint64_t triangleMeshVerticesBaseAddress; uint32_t triangleMeshMainObjectIndex; + uint32_t isDTMRendering; }; // TODO: Compute this in a compute shader from the world counterparts diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 0cf4e3bce..4327cf7fe 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -236,6 +236,8 @@ struct PSInput // [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl +[[vk::push_constant]] PushConstants pc; + [[vk::combinedImageSampler]][[vk::binding(1, 0)]] Texture2DArray msdfTextures : register(t4); [[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState msdfSampler : register(s4); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 5d5d464cc..ab6388bc8 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -1,969 +1,972 @@ -#define FRAGMENT_SHADER_INPUT -#include "common.hlsl" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -template -struct DefaultClipper -{ - using float_t2 = vector; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; - - static DefaultClipper construct() - { - DefaultClipper ret; - return ret; - } - - inline float_t2 operator()(const float_t t) - { - const float_t ret = clamp(t, 0.0, 1.0); - return float_t2(ret, ret); - } -}; - -// for usage in upper_bound function -struct StyleAccessor -{ - LineStyle style; - using value_type = float; - - float operator[](const uint32_t ix) - { - return style.getStippleValue(ix); - } -}; - -template -struct StyleClipper -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; - - static StyleClipper construct( - LineStyle style, - CurveType curve, - typename CurveType::ArcLengthCalculator arcLenCalc, - float phaseShift, - float stretch, - float worldToScreenRatio) - { - StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; - - // values for non-uniform stretching with a rigid segment - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - // rigidSegment info in old non stretched pattern - ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; - ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; - ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; - // stretch value for non rigid segments - ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); - // rigidSegment info to new stretched pattern - ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start - ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len - ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end - } - else - { - ret.nonRigidSegmentStretchValue = stretch; - } - - return ret; - } - - // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes - // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern - float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) - { - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment - ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment - ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment - ret *= stretch; - return ret; - } - else - { - return normalizedPlaceInPattern; - } - } - - float_t2 operator()(float_t t) - { - // basicaly 0.0 and 1.0 but with a guardband to discard outside the range - const float_t minT = 0.0 - 1.0; - const float_t maxT = 1.0 + 1.0; - - StyleAccessor styleAccessor = { style }; - const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; - const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); - - const float_t arcLen = arcLenCalc.calcArcLen(t); - const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); - float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); - uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); - - const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; - float_t2 ret = float_t2(InvalidT, InvalidT); - - // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections - const bool notInDrawSection = patternIdx & 0x1; - - // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections - float_t minDrawT = 0.0; - float_t maxDrawT = 1.0; - { - float_t normalizedPlaceInPatternBegin = frac(phaseShift); - normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); - uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); - const bool BeginInNonDrawSection = patternIdxBegin & 0x1; - - if (BeginInNonDrawSection) - { - float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; - diffToRightDrawableSection -= normalizedPlaceInPatternBegin; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; - minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); - } - - // Completely in non-draw section -> clip away: - if (minDrawT >= 1.0) - return ret; - - const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); - const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); - float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); - uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); - const bool EndInNonDrawSection = patternIdxEnd & 0x1; - - if (EndInNonDrawSection) - { - float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; - maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); - } - } - - if (notInDrawSection) - { - float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - - float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; - const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; - float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); - t0 = clamp(t0, minDrawT, maxDrawT); - - float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; - diffToRightDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; - const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; - float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); - t1 = clamp(t1, minDrawT, maxDrawT); - - ret = float_t2(t0, t1); - } - else - { - t = clamp(t, minDrawT, maxDrawT); - ret = float_t2(t, t); - } - - return ret; - } - - LineStyle style; - CurveType curve; - typename CurveType::ArcLengthCalculator arcLenCalc; - float phaseShift; - float stretch; - float worldToScreenRatio; - // precomp value for non uniform stretching - float rigidSegmentStart; - float rigidSegmentEnd; - float rigidSegmentLen; - float nonRigidSegmentStretchValue; -}; - -template > -struct ClippedSignedDistance -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - - const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) - { - typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); - - const float_t InvalidT = nbl::hlsl::numeric_limits::max; - // TODO: Fix and test, we're not working with squared distance anymore - const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? - - bool clipped = false; - float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; - float_t closestT = InvalidT; - [[unroll(CurveType::MaxCandidates)]] - for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) - { - const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); - if (candidateDistanceSquared < closestDistanceSquared) - { - float_t2 snappedTs = clipper(candidates[i]); - - if (snappedTs[0] == InvalidT) - { - continue; - } - - if (snappedTs[0] != candidates[i]) - { - // left snapped or clamped - const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); - if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[0]; - closestDistanceSquared = leftSnappedCandidateDistanceSquared; - } - - if (snappedTs[0] != snappedTs[1]) - { - // right snapped or clamped - const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); - if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[1]; - closestDistanceSquared = rightSnappedCandidateDistanceSquared; - } - } - } - else - { - // no snapping - if (candidateDistanceSquared < closestDistanceSquared) - { - clipped = false; - closestT = candidates[i]; - closestDistanceSquared = candidateDistanceSquared; - } - } - } - } - - - float_t roundedDistance = closestDistanceSquared - thickness; - if(!isRoadStyle) - { - return roundedDistance; - } - else - { - const float_t aaWidth = globals.antiAliasingFactor; - float_t rectCappedDistance = roundedDistance; - - if (clipped) - { - float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); - rectCappedDistance = capSquare(q, thickness, aaWidth); - } - - return rectCappedDistance; - } - } - - static float capSquare(float_t2 q, float_t th, float_t aaWidth) - { - float_t2 d = abs(q) - float_t2(aaWidth, th); - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); - } -}; - -// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ -float sdTrapezoid(float2 p, float r1, float r2, float he) -{ - float2 k1 = float2(r2, he); - float2 k2 = float2(r2 - r1, 2.0 * he); - - p.x = abs(p.x); - float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); - float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); - - float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; - - return s * sqrt(min(dot(ca,ca), dot(cb,cb))); -} - -// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries -float2 sdLineDstVec(float2 P, float2 A, float2 B) -{ - const float2 PA = P - A; - const float2 BA = B - A; - float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); - return PA - BA * h; -} - -float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) -{ - float h = length(b - a) / 2.0; - float2 d = normalize(b - a); - float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); - p = mul(rot, p); - p.y -= h - thickness; - return sdTrapezoid(p, ra, rb, h); -} - -typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; -typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; - -// for usage in upper_bound function -struct DTMSettingsHeightsAccessor -{ - DTMSettings dtmSettings; - using value_type = float; - - float operator[](const uint32_t ix) - { - return dtmSettings.heightColorMapHeights[ix]; - } -}; - -// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm -// because there is no `if constexpr` in hlsl -// @params -// textureColor: color sampled from a texture -// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. -template -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); - -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; - if (!colorFromTexture) - { - float32_t4 col = loadLineStyle(styleIdx).color; - col.w *= localAlpha; - return float4(col); - } - else - return float4(localTextureColor, localAlpha); -} -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - float32_t4 color; - nbl::hlsl::spirv::beginInvocationInterlockEXT(); - - const uint32_t packedData = pseudoStencil[fragCoord]; - - const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); - const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); - const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored - const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; - - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) - { - toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); - } - - // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. - // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. - if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) - { - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); - if (colorFromTexture) // writing color from texture - colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); - } - - nbl::hlsl::spirv::endInvocationInterlockEXT(); - - if (!resolve) - discard; - - // draw with previous geometry's style's color or stored in texture buffer :kek: - // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj - if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = loadLineStyle(toResolveStyleIdx).color; - color.a *= float(storedQuantizedAlpha) / 255.f; - - return color; -} - -float dot2(in float2 vec) -{ - return dot(vec, vec); -} - -[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] -[shader("pixel")] -float4 fragMain(PSInput input) : SV_TARGET -{ - float localAlpha = 0.0f; - float3 textureColor = float3(0, 0, 0); // color sampled from a texture - - ObjectType objType = input.getObjType(); - const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); - const MainObject mainObj = loadMainObject(currentMainObjectIdx); - -#define DTM -#ifdef DTM - // TRIANGLE RENDERING - { - const float outlineThickness = input.getOutlineThickness(); - const float contourThickness = input.getContourLineThickness(); - const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; // TODO: figure out what is it for ---> [ERFAN's REPLY: no need to give shit about this in dtms, it's for special shape styles] - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); - - float3 v[3]; - v[0] = input.getScreenSpaceVertexAttribs(0); - v[1] = input.getScreenSpaceVertexAttribs(1); - v[2] = input.getScreenSpaceVertexAttribs(2); - - const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; - - // indices of points constructing every edge - uint2 edgePoints[3]; - edgePoints[0] = uint2(0, 1); - edgePoints[1] = uint2(1, 2); - edgePoints[2] = uint2(2, 0); - - // index of vertex opposing an edge, needed for calculation of triangle heights - uint opposingVertexIdx[3]; - opposingVertexIdx[0] = 2; - opposingVertexIdx[1] = 0; - opposingVertexIdx[2] = 1; - - float height = input.getHeight(); - - // HEIGHT SHADING - const uint32_t heightMapSize = dtm.heightColorEntryCount; - float minShadingHeight = dtm.heightColorMapHeights[0]; - float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; - - if (heightMapSize > 0) - { - // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez - float2 e0 = v[1] - v[0]; - float2 e1 = v[2] - v[1]; - float2 e2 = v[0] - v[2]; - - float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); - float2 v0 = input.position.xy - v[0]; - float2 v1 = input.position.xy - v[1]; - float2 v2 = input.position.xy - v[2]; - - float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); - float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); - float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); - - float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); - float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); - float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); - float heightDeriv = fwidth(height); - float line3Sdf = (minShadingHeight - height) / heightDeriv; - float line4Sdf = (height - maxShadingHeight) / heightDeriv; - - float convexPolygonSdf = max(line0Sdf, line1Sdf); - convexPolygonSdf = max(convexPolygonSdf, line2Sdf); - convexPolygonSdf = max(convexPolygonSdf, line3Sdf); - convexPolygonSdf = max(convexPolygonSdf, line4Sdf); - - localAlpha = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); - - // calculate height color - DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); - - if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) - { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t mapIndexPlus1 = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t mapIndex = mapIndexPlus1 == 0 ? mapIndexPlus1 : mapIndexPlus1 - 1; - - float heightDeriv = fwidth(height); - bool blendWithPrev = true - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); - - // logic explainer: if colorIdx is 0.0 then it means blend with next - // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true - // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values - if (blendWithPrev) - { - if (mapIndex > 0) - { - float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; - float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); - } - else - { - textureColor = dtm.heightColorMapColors[mapIndex].rgb; - } - } - else - { - if (mapIndex < heightMapSize - 1) - { - float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; - float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); - } - else - { - textureColor = dtm.heightColorMapColors[mapIndex].rgb; - } - } - - //localAlpha = dtm.heightColorMapColors[mapIndex].a; - } - else - { - float heightTmp; - if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) - { - float interval = dtm.intervalWidth; - int sectionIndex = int((height - minShadingHeight) / interval); - heightTmp = minShadingHeight + float(sectionIndex) * interval; - } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) - { - heightTmp = height; - } - - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - - float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - - float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; - - float interpolationVal; - if (upperBoundHeightIndex == 0) - interpolationVal = 1.0f; - else - interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - - textureColor = lerp(lowerBoundColor.rgb, upperBoundColor.rgb, interpolationVal); - localAlpha = lerp(lowerBoundColor.a, upperBoundColor.a, interpolationVal);; - } - } - //else // TODO: remove!! - //{ - // printf("WTF"); - // return float4(0.0f, 0.0f, 0.0f, 1.0f); - //} - - // CONTOUR - - // TODO: move to ubo or push constants - const float startHeight = dtm.contourLinesStartHeight; - const float endHeight = dtm.contourLinesEndHeight; - const float interval = dtm.contourLinesHeightInterval; - - // TODO: can be precomputed - const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; - - // TODO: it actually can output a negative number, fix - int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); - contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); - float contourLineHeight = startHeight + interval * contourLineIdx; - - int contourLinePointsIdx = 0; - float2 contourLinePoints[2]; - // TODO: case where heights we are looking for are on all three vertices - for (int i = 0; i < 3; ++i) - { - if (contourLinePointsIdx == 3) - break; - - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - if (p1.z < p0.z) - nbl::hlsl::swap(p0, p1); - - float minHeight = p0.z; - float maxHeight = p1.z; - - if (height >= minHeight && height <= maxHeight) - { - float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); - float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); - - contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); - ++contourLinePointsIdx; - } - } - - { - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); - - float distance = nbl::hlsl::numeric_limits::max; - if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag); - } - else - { - // TODO: - // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! - // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); - } - - float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; - textureColor = lerp(textureColor, contourStyle.color.rgb, contourLocalAlpha); - localAlpha = max(localAlpha, contourLocalAlpha); - } - - - - // OUTLINE - - // find sdf of every edge - float triangleAreaTimesTwo; - { - float3 AB = v[0] - v[1]; - float3 AC = v[0] - v[2]; - AB.z = 0.0f; - AC.z = 0.0f; - - // TODO: figure out if there is a faster solution - triangleAreaTimesTwo = length(cross(AB, AC)); - } - - // calculate sdf of every edge as it wasn't stippled - float distances[3]; - for (int i = 0; i < 3; ++i) - { - const uint2 currentEdgePoints = edgePoints[i]; - float3 A = v[currentEdgePoints[0]]; - float3 B = v[currentEdgePoints[1]]; - float3 AB = B - A; - float ABLen = length(AB); - - distances[i] = (triangleAreaTimesTwo / ABLen) * baryCoord[opposingVertexIdx[i]]; - } - - float minDistance = nbl::hlsl::numeric_limits::max; - if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - for (uint i = 0; i < 3; ++i) - distances[i] -= outlineThickness; - - minDistance = min(distances[0], min(distances[1], distances[2])); - } - else - { - for (int i = 0; i < 3; ++i) - { - if (distances[i] > outlineThickness) - continue; - - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - // long story short, in order for stipple patterns to be consistent: - // - point with lesser x coord should be starting point - // - if x coord of both points are equal then point with lesser y value should be starting point - if (p1.x < p0.x) - nbl::hlsl::swap(p0, p1); - else if (p1.x == p0.x && p1.y < p0.y) - nbl::hlsl::swap(p0, p1); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - - float distance = nbl::hlsl::numeric_limits::max; - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); - - minDistance = min(minDistance, distance); - } - - } - - float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; - textureColor = lerp(textureColor, outlineStyle.color.rgb, outlineLocalAlpha); - localAlpha = max(localAlpha, outlineLocalAlpha); - } - - return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); -#endif - // figure out local alpha with sdf - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) - { - float distance = nbl::hlsl::numeric_limits::max; - if (objType == ObjectType::LINE) - { - const float2 start = input.getLineStart(); - const float2 end = input.getLineEnd(); - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - - LineStyle style = loadLineStyle(styleIdx); - - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::QUAD_BEZIER) - { - nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); - nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); - - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - LineStyle style = loadLineStyle(styleIdx); - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); - distance = miterSDF( - P, - input.getLineThickness(), - input.getPolylineConnectorTrapezoidStart(), - input.getPolylineConnectorTrapezoidEnd(), - input.getPolylineConnectorTrapezoidLongBase(), - input.getPolylineConnectorTrapezoidShortBase()); - - } - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); - } - else if (objType == ObjectType::CURVE_BOX) - { - const float minorBBoxUV = input.getMinorBBoxUV(); - const float majorBBoxUV = input.getMajorBBoxUV(); - - nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); - nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); - nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); - nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); - - // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. - nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - - const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); - const float minEv = curveMinMinor.evaluate(minT); - - const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); - const float maxEv = curveMaxMinor.evaluate(maxT); - - const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; - const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; - - if (insideMinor && insideMajor) - { - localAlpha = 1.0; - } - else - { - // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) - // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor - const float InvalidT = nbl::hlsl::numeric_limits::max; - const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; - - const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); - - - float closestDistanceSquared = MAX_DISTANCE_SQUARED; - const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; - - if (minorBBoxUV < minEv) - { - // DO SDF of Min Curve - nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, - float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, - float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); - - nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = minCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - else if (minorBBoxUV > maxEv) - { - // Do SDF of Max Curve - nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, - float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, - float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); - nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - - if (!insideMajor) - { - const bool minLessThanMax = minEv < maxEv; - float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); - if (majorBBoxUV > 1.0) - { - const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveEnd; - } - else - { - const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveStart; - } - - const float majorDistSq = dot(majorDistVector, majorDistVector); - if (majorDistSq < closestDistanceSquared) - closestDistanceSquared = majorDistSq; - } - - const float dist = sqrt(closestDistanceSquared); - localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); - } - - LineStyle style = loadLineStyle(mainObj.styleIdx); - uint32_t textureId = asuint(style.screenSpaceLineWidth); - if (textureId != InvalidTextureIdx) - { - // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected - // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); - localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); - } - } - else if (objType == ObjectType::FONT_GLYPH) - { - const float2 uv = input.getFontGlyphUV(); - const uint32_t textureId = input.getFontGlyphTextureId(); - - if (textureId != InvalidTextureIdx) - { - float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); - /* - explaining "*= exp2(max(mipLevel,0.0))" - Each mip level has constant MSDFPixelRange - Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip - As we go up 1 mip level, the msdf distance should be multiplied by 2.0 - While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. - It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! - - Alternatively you can think of it as doing this instead: - localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); - Which is reducing the aa feathering as we go up the mip levels. - to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph - */ - msdf *= exp2(max(mipLevel,0.0)); - - LineStyle style = loadLineStyle(mainObj.styleIdx); - const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; - const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle - localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); - } - } - else if (objType == ObjectType::IMAGE) - { - const float2 uv = input.getImageUV(); - const uint32_t textureId = input.getImageTextureId(); - - if (textureId != InvalidTextureIdx) - { - float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); - textureColor = colorSample.rgb; - localAlpha = colorSample.a; - } - } - - uint2 fragCoord = uint2(input.position.xy); - - if (localAlpha <= 0) - discard; - - const bool colorFromTexture = objType == ObjectType::IMAGE; - - // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. - // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here - // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. - return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); -} +#define FRAGMENT_SHADER_INPUT +#include "common.hlsl" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +struct DefaultClipper +{ + using float_t2 = vector; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; + + static DefaultClipper construct() + { + DefaultClipper ret; + return ret; + } + + inline float_t2 operator()(const float_t t) + { + const float_t ret = clamp(t, 0.0, 1.0); + return float_t2(ret, ret); + } +}; + +// for usage in upper_bound function +struct StyleAccessor +{ + LineStyle style; + using value_type = float; + + float operator[](const uint32_t ix) + { + return style.getStippleValue(ix); + } +}; + +template +struct StyleClipper +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; + + static StyleClipper construct( + LineStyle style, + CurveType curve, + typename CurveType::ArcLengthCalculator arcLenCalc, + float phaseShift, + float stretch, + float worldToScreenRatio) + { + StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; + + // values for non-uniform stretching with a rigid segment + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + // rigidSegment info in old non stretched pattern + ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; + ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; + ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; + // stretch value for non rigid segments + ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); + // rigidSegment info to new stretched pattern + ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start + ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len + ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end + } + else + { + ret.nonRigidSegmentStretchValue = stretch; + } + + return ret; + } + + // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes + // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern + float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) + { + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment + ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment + ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment + ret *= stretch; + return ret; + } + else + { + return normalizedPlaceInPattern; + } + } + + float_t2 operator()(float_t t) + { + // basicaly 0.0 and 1.0 but with a guardband to discard outside the range + const float_t minT = 0.0 - 1.0; + const float_t maxT = 1.0 + 1.0; + + StyleAccessor styleAccessor = { style }; + const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; + const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); + + const float_t arcLen = arcLenCalc.calcArcLen(t); + const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); + float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); + uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); + + const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; + float_t2 ret = float_t2(InvalidT, InvalidT); + + // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections + const bool notInDrawSection = patternIdx & 0x1; + + // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections + float_t minDrawT = 0.0; + float_t maxDrawT = 1.0; + { + float_t normalizedPlaceInPatternBegin = frac(phaseShift); + normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); + uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); + const bool BeginInNonDrawSection = patternIdxBegin & 0x1; + + if (BeginInNonDrawSection) + { + float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; + diffToRightDrawableSection -= normalizedPlaceInPatternBegin; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; + minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); + } + + // Completely in non-draw section -> clip away: + if (minDrawT >= 1.0) + return ret; + + const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); + const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); + float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); + uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); + const bool EndInNonDrawSection = patternIdxEnd & 0x1; + + if (EndInNonDrawSection) + { + float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; + maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); + } + } + + if (notInDrawSection) + { + float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + + float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; + const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; + float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); + t0 = clamp(t0, minDrawT, maxDrawT); + + float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; + diffToRightDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; + const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; + float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); + t1 = clamp(t1, minDrawT, maxDrawT); + + ret = float_t2(t0, t1); + } + else + { + t = clamp(t, minDrawT, maxDrawT); + ret = float_t2(t, t); + } + + return ret; + } + + LineStyle style; + CurveType curve; + typename CurveType::ArcLengthCalculator arcLenCalc; + float phaseShift; + float stretch; + float worldToScreenRatio; + // precomp value for non uniform stretching + float rigidSegmentStart; + float rigidSegmentEnd; + float rigidSegmentLen; + float nonRigidSegmentStretchValue; +}; + +template > +struct ClippedSignedDistance +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + + const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) + { + typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); + + const float_t InvalidT = nbl::hlsl::numeric_limits::max; + // TODO: Fix and test, we're not working with squared distance anymore + const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? + + bool clipped = false; + float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; + float_t closestT = InvalidT; + [[unroll(CurveType::MaxCandidates)]] + for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) + { + const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); + if (candidateDistanceSquared < closestDistanceSquared) + { + float_t2 snappedTs = clipper(candidates[i]); + + if (snappedTs[0] == InvalidT) + { + continue; + } + + if (snappedTs[0] != candidates[i]) + { + // left snapped or clamped + const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); + if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[0]; + closestDistanceSquared = leftSnappedCandidateDistanceSquared; + } + + if (snappedTs[0] != snappedTs[1]) + { + // right snapped or clamped + const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); + if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[1]; + closestDistanceSquared = rightSnappedCandidateDistanceSquared; + } + } + } + else + { + // no snapping + if (candidateDistanceSquared < closestDistanceSquared) + { + clipped = false; + closestT = candidates[i]; + closestDistanceSquared = candidateDistanceSquared; + } + } + } + } + + + float_t roundedDistance = closestDistanceSquared - thickness; + if(!isRoadStyle) + { + return roundedDistance; + } + else + { + const float_t aaWidth = globals.antiAliasingFactor; + float_t rectCappedDistance = roundedDistance; + + if (clipped) + { + float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); + rectCappedDistance = capSquare(q, thickness, aaWidth); + } + + return rectCappedDistance; + } + } + + static float capSquare(float_t2 q, float_t th, float_t aaWidth) + { + float_t2 d = abs(q) - float_t2(aaWidth, th); + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); + } +}; + +// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ +float sdTrapezoid(float2 p, float r1, float r2, float he) +{ + float2 k1 = float2(r2, he); + float2 k2 = float2(r2 - r1, 2.0 * he); + + p.x = abs(p.x); + float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); + float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); + + float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; + + return s * sqrt(min(dot(ca,ca), dot(cb,cb))); +} + +// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries +float2 sdLineDstVec(float2 P, float2 A, float2 B) +{ + const float2 PA = P - A; + const float2 BA = B - A; + float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); + return PA - BA * h; +} + +float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) +{ + float h = length(b - a) / 2.0; + float2 d = normalize(b - a); + float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); + p = mul(rot, p); + p.y -= h - thickness; + return sdTrapezoid(p, ra, rb, h); +} + +typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; +typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMSettings dtmSettings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return dtmSettings.heightColorMapHeights[ix]; + } +}; + +// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm +// because there is no `if constexpr` in hlsl +// @params +// textureColor: color sampled from a texture +// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. +template +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); + +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; + if (!colorFromTexture) + { + float32_t4 col = loadLineStyle(styleIdx).color; + col.w *= localAlpha; + return float4(col); + } + else + return float4(localTextureColor, localAlpha); +} +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + float32_t4 color; + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + const uint32_t packedData = pseudoStencil[fragCoord]; + + const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); + const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); + const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored + const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; + uint32_t toResolveStyleIdx = InvalidStyleIdx; + + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + + // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. + // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. + if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) + { + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); + if (colorFromTexture) // writing color from texture + colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); + } + + nbl::hlsl::spirv::endInvocationInterlockEXT(); + + if (!resolve) + discard; + + // draw with previous geometry's style's color or stored in texture buffer :kek: + // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj + if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style + color = loadLineStyle(toResolveStyleIdx).color; + color.a *= float(storedQuantizedAlpha) / 255.f; + + return color; +} + +float dot2(in float2 vec) +{ + return dot(vec, vec); +} + +[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] +[shader("pixel")] +float4 fragMain(PSInput input) : SV_TARGET +{ + float localAlpha = 0.0f; + float3 textureColor = float3(0, 0, 0); // color sampled from a texture + + ObjectType objType = input.getObjType(); + const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); + const MainObject mainObj = loadMainObject(currentMainObjectIdx); + + if (pc.isDTMRendering) + { + // TRIANGLE RENDERING + { + const float outlineThickness = input.getOutlineThickness(); + const float contourThickness = input.getContourLineThickness(); + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 1.0f; // TODO: figure out what is it for ---> [ERFAN's REPLY: no need to give shit about this in dtms, it's for special shape styles] + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + + float3 v[3]; + v[0] = input.getScreenSpaceVertexAttribs(0); + v[1] = input.getScreenSpaceVertexAttribs(1); + v[2] = input.getScreenSpaceVertexAttribs(2); + + const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + + // indices of points constructing every edge + uint2 edgePoints[3]; + edgePoints[0] = uint2(0, 1); + edgePoints[1] = uint2(1, 2); + edgePoints[2] = uint2(2, 0); + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + float height = input.getHeight(); + + // HEIGHT SHADING + const uint32_t heightMapSize = dtm.heightColorEntryCount; + float minShadingHeight = dtm.heightColorMapHeights[0]; + float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; + + if (heightMapSize > 0) + { + // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez + float2 e0 = v[1] - v[0]; + float2 e1 = v[2] - v[1]; + float2 e2 = v[0] - v[2]; + + float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); + float2 v0 = input.position.xy - v[0]; + float2 v1 = input.position.xy - v[1]; + float2 v2 = input.position.xy - v[2]; + + float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); + float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); + float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); + + float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); + float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); + float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); + float heightDeriv = fwidth(height); + float line3Sdf = (minShadingHeight - height) / heightDeriv; + float line4Sdf = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = max(line0Sdf, line1Sdf); + convexPolygonSdf = max(convexPolygonSdf, line2Sdf); + convexPolygonSdf = max(convexPolygonSdf, line3Sdf); + convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + + localAlpha = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); + + // calculate height color + DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); + + if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t mapIndexPlus1 = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t mapIndex = mapIndexPlus1 == 0 ? mapIndexPlus1 : mapIndexPlus1 - 1; + + float heightDeriv = fwidth(height); + bool blendWithPrev = true + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + if (blendWithPrev) + { + if (mapIndex > 0) + { + float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; + float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); + } + else + { + textureColor = dtm.heightColorMapColors[mapIndex].rgb; + } + } + else + { + if (mapIndex < heightMapSize - 1) + { + float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; + float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); + textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); + } + else + { + textureColor = dtm.heightColorMapColors[mapIndex].rgb; + } + } + + //localAlpha = dtm.heightColorMapColors[mapIndex].a; + } + else + { + float heightTmp; + if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float interval = dtm.intervalWidth; + int sectionIndex = int((height - minShadingHeight) / interval); + heightTmp = minShadingHeight + float(sectionIndex) * interval; + } + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + heightTmp = height; + } + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + textureColor = lerp(lowerBoundColor.rgb, upperBoundColor.rgb, interpolationVal); + localAlpha = lerp(lowerBoundColor.a, upperBoundColor.a, interpolationVal);; + } + } + //else // TODO: remove!! + //{ + // printf("WTF"); + // return float4(0.0f, 0.0f, 0.0f, 1.0f); + //} + + // CONTOUR + + // TODO: move to ubo or push constants + const float startHeight = dtm.contourLinesStartHeight; + const float endHeight = dtm.contourLinesEndHeight; + const float interval = dtm.contourLinesHeightInterval; + + // TODO: can be precomputed + const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + // TODO: case where heights we are looking for are on all three vertices + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 3) + break; + + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; + + if (p1.z < p0.z) + nbl::hlsl::swap(p0, p1); + + float minHeight = p0.z; + float maxHeight = p1.z; + + if (height >= minHeight && height <= maxHeight) + { + float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); + float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); + + contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); + ++contourLinePointsIdx; + } + } + + { + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + + float distance = nbl::hlsl::numeric_limits::max; + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + + float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; + textureColor = lerp(textureColor, contourStyle.color.rgb, contourLocalAlpha); + localAlpha = max(localAlpha, contourLocalAlpha); + } + + + + // OUTLINE + + // find sdf of every edge + float triangleAreaTimesTwo; + { + float3 AB = v[0] - v[1]; + float3 AC = v[0] - v[2]; + AB.z = 0.0f; + AC.z = 0.0f; + + // TODO: figure out if there is a faster solution + triangleAreaTimesTwo = length(cross(AB, AC)); + } + + // calculate sdf of every edge as it wasn't stippled + float distances[3]; + for (int i = 0; i < 3; ++i) + { + const uint2 currentEdgePoints = edgePoints[i]; + float3 A = v[currentEdgePoints[0]]; + float3 B = v[currentEdgePoints[1]]; + float3 AB = B - A; + float ABLen = length(AB); + + distances[i] = (triangleAreaTimesTwo / ABLen) * baryCoord[opposingVertexIdx[i]]; + } + + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (uint i = 0; i < 3; ++i) + distances[i] -= outlineThickness; + + minDistance = min(distances[0], min(distances[1], distances[2])); + } + else + { + for (int i = 0; i < 3; ++i) + { + if (distances[i] > outlineThickness) + continue; + + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; + + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + + } + + float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; + textureColor = lerp(textureColor, outlineStyle.color.rgb, outlineLocalAlpha); + localAlpha = max(localAlpha, outlineLocalAlpha); + } + + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + } + else + { + // figure out local alpha with sdf + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + { + float distance = nbl::hlsl::numeric_limits::max; + if (objType == ObjectType::LINE) + { + const float2 start = input.getLineStart(); + const float2 end = input.getLineEnd(); + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + + LineStyle style = loadLineStyle(styleIdx); + + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::QUAD_BEZIER) + { + nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); + nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); + + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + LineStyle style = loadLineStyle(styleIdx); + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); + distance = miterSDF( + P, + input.getLineThickness(), + input.getPolylineConnectorTrapezoidStart(), + input.getPolylineConnectorTrapezoidEnd(), + input.getPolylineConnectorTrapezoidLongBase(), + input.getPolylineConnectorTrapezoidShortBase()); + + } + localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + } + else if (objType == ObjectType::CURVE_BOX) + { + const float minorBBoxUV = input.getMinorBBoxUV(); + const float majorBBoxUV = input.getMajorBBoxUV(); + + nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); + nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); + nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); + nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); + + // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. + nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + + const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); + const float minEv = curveMinMinor.evaluate(minT); + + const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); + const float maxEv = curveMaxMinor.evaluate(maxT); + + const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; + const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; + + if (insideMinor && insideMajor) + { + localAlpha = 1.0; + } + else + { + // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) + // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor + const float InvalidT = nbl::hlsl::numeric_limits::max; + const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; + + const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); + + + float closestDistanceSquared = MAX_DISTANCE_SQUARED; + const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; + + if (minorBBoxUV < minEv) + { + // DO SDF of Min Curve + nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, + float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, + float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); + + nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = minCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + else if (minorBBoxUV > maxEv) + { + // Do SDF of Max Curve + nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, + float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, + float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); + nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + + if (!insideMajor) + { + const bool minLessThanMax = minEv < maxEv; + float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); + if (majorBBoxUV > 1.0) + { + const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveEnd; + } + else + { + const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveStart; + } + + const float majorDistSq = dot(majorDistVector, majorDistVector); + if (majorDistSq < closestDistanceSquared) + closestDistanceSquared = majorDistSq; + } + + const float dist = sqrt(closestDistanceSquared); + localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); + } + + LineStyle style = loadLineStyle(mainObj.styleIdx); + uint32_t textureId = asuint(style.screenSpaceLineWidth); + if (textureId != InvalidTextureIdx) + { + // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected + // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); + localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); + } + } + else if (objType == ObjectType::FONT_GLYPH) + { + const float2 uv = input.getFontGlyphUV(); + const uint32_t textureId = input.getFontGlyphTextureId(); + + if (textureId != InvalidTextureIdx) + { + float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); + /* + explaining "*= exp2(max(mipLevel,0.0))" + Each mip level has constant MSDFPixelRange + Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip + As we go up 1 mip level, the msdf distance should be multiplied by 2.0 + While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. + It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! + + Alternatively you can think of it as doing this instead: + localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); + Which is reducing the aa feathering as we go up the mip levels. + to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph + */ + msdf *= exp2(max(mipLevel,0.0)); + + LineStyle style = loadLineStyle(mainObj.styleIdx); + const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; + const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle + localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); + } + } + else if (objType == ObjectType::IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIdx) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } + + uint2 fragCoord = uint2(input.position.xy); + + if (localAlpha <= 0) + discard; + + const bool colorFromTexture = objType == ObjectType::IMAGE; + + // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. + // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here + // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. + return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); + } +} diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 4a955d92d..b300a6958 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -7,8 +7,6 @@ #include #include -[[vk::push_constant]] PushConstants pc; - // TODO[Lucas]: Move these functions to builtin hlsl functions (Even the shadertoy obb and aabb ones) float cross2D(float2 a, float2 b) { @@ -85,73 +83,9 @@ PSInput main(uint vertexID : SV_VertexID) // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. -#define DTM -#ifdef DTM - PSInput outV; - // Default Initialize PS Input - outV.position.zw = float2(0.0, 1.0); - outV.data1 = uint4(0, 0, 0, 0); - outV.data2 = float4(0, 0, 0, 0); - outV.data3 = float4(0, 0, 0, 0); - outV.data4 = float4(0, 0, 0, 0); - outV.interp_data5 = float2(0, 0); - outV.setObjType(ObjectType::TRIANGLE_MESH); - outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); - - TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); - pfloat64_t2 vtxPos; - vtxPos.x = _static_cast(vtx.pos.x); - vtxPos.y = _static_cast(vtx.pos.y); - - MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); - ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); - - float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); - - outV.position.xy = transformedPos; - outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); - const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); - outV.setHeight(heightAsFloat); - outV.setScreenSpaceVertexAttribs(float3(transformedPos, heightAsFloat)); - outV.setCurrentWorldToScreenRatio( - _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) - ); - - // TODO: line style of contour line has to be set too! - DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); - const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; - const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; - outV.setOutlineThickness(sdfOutlineThickness); - outV.setContourLineThickness(sdfContourLineThickness); - - // full screen triangle (this will destroy outline, contour line and height drawing) -#if 0 - const uint vertexIdx = vertexID % 3; - if(vertexIdx == 0) - outV.position.xy = float2(-1.0f, -1.0f); - else if (vertexIdx == 1) - outV.position.xy = float2(-1.0f, 3.0f); - else if (vertexIdx == 2) - outV.position.xy = float2(3.0f, -1.0f); -#endif + ClipProjectionData clipProjectionData; - return outV; - -#else - - const uint vertexIdx = vertexID & 0x3u; - const uint objectID = vertexID >> 2; - - DrawObject drawObj = loadDrawObject(objectID); - - ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); - uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; PSInput outV; // Default Initialize PS Input @@ -161,487 +95,542 @@ PSInput main(uint vertexID : SV_VertexID) outV.data3 = float4(0, 0, 0, 0); outV.data4 = float4(0, 0, 0, 0); outV.interp_data5 = float2(0, 0); - outV.setObjType(objType); - outV.setMainObjectIdx(drawObj.mainObjIndex); - - MainObject mainObj = loadMainObject(drawObj.mainObjIndex); - ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); - - // We only need these for Outline type objects like lines and bezier curves - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + if (pc.isDTMRendering) { - LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + outV.setObjType(ObjectType::TRIANGLE_MESH); + outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); + + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); + pfloat64_t2 vtxPos; + vtxPos.x = _static_cast(vtx.pos.x); + vtxPos.y = _static_cast(vtx.pos.y); + + MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + clipProjectionData = getClipProjectionData(mainObj); - // Width is on both sides, thickness is one one side of the curve (div by 2.0f) - const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; - const float sdfLineThickness = screenSpaceLineWidth / 2.0f; - outV.setLineThickness(sdfLineThickness); + float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); + + outV.position.xy = transformedPos; + outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); + const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); + outV.setHeight(heightAsFloat); + outV.setScreenSpaceVertexAttribs(float3(transformedPos, heightAsFloat)); outV.setCurrentWorldToScreenRatio( _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) + (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) ); - if (objType == ObjectType::LINE) - { - pfloat64_t2 points[2u]; - points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - - const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); - - float2 transformedPoints[2u]; - for (uint i = 0u; i < 2u; ++i) - { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } + // TODO: line style of contour line has to be set too! + DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; + const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; + outV.setOutlineThickness(sdfOutlineThickness); + outV.setContourLineThickness(sdfContourLineThickness); + + // full screen triangle (this will destroy outline, contour line and height drawing) +#if 0 + const uint vertexIdx = vertexID % 3; + if(vertexIdx == 0) + outV.position.xy = float2(-1.0f, -1.0f); + else if (vertexIdx == 1) + outV.position.xy = float2(-1.0f, 3.0f); + else if (vertexIdx == 2) + outV.position.xy = float2(3.0f, -1.0f); +#endif + } + else + { + const uint vertexIdx = vertexID & 0x3u; + const uint objectID = vertexID >> 2; - const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); - const float2 normalToLine = float2(-lineVector.y, lineVector.x); + DrawObject drawObj = loadDrawObject(objectID); - if (vertexIdx == 0u || vertexIdx == 1u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[0u] - + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) - - lineVector * antiAliasedLineThickness; - } - else // if (vertexIdx == 2u || vertexIdx == 3u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[1u] - + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) - + lineVector * antiAliasedLineThickness; - } - - outV.setLineStart(transformedPoints[0u]); - outV.setLineEnd(transformedPoints[1u]); + ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); + uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; + outV.setObjType(objType); + outV.setMainObjectIdx(drawObj.mainObjIndex); + - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else if (objType == ObjectType::QUAD_BEZIER) + MainObject mainObj = loadMainObject(drawObj.mainObjIndex); + clipProjectionData = getClipProjectionData(mainObj); + + // We only need these for Outline type objects like lines and bezier curves + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) { - pfloat64_t2 points[3u]; - points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - points[2u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); - - const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); - const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); - - // transform these points into screen space and pass to fragment - float2 transformedPoints[3u]; - for (uint i = 0u; i < 3u; ++i) + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + + // Width is on both sides, thickness is one one side of the curve (div by 2.0f) + const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; + const float sdfLineThickness = screenSpaceLineWidth / 2.0f; + outV.setLineThickness(sdfLineThickness); + outV.setCurrentWorldToScreenRatio( + _static_cast((_static_cast(2.0f) / + (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) + ); + + if (objType == ObjectType::LINE) { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } + pfloat64_t2 points[2u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); - shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); - shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); - - outV.setQuadratic(quadratic); - outV.setQuadraticPrecomputedArcLenData(preCompData); - - float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; - float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - - // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ - float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; - float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; - float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; - float MaxCurvature; - if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) - MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); - else - MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - - // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; - // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) - // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, - const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); - if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) - { - //OBB Fallback - float2 obbV0; - float2 obbV1; - float2 obbV2; - float2 obbV3; - quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); - if (subsectionIdx == 0) + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); + + float2 transformedPoints[2u]; + for (uint i = 0u; i < 2u; ++i) { - if (vertexIdx == 0u) - outV.position = float4(obbV0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(obbV1, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(obbV3, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(obbV2, 0.0, 1.0f); + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); } - else - outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); - } - else - { - // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation - // this is the place where we use it's tangent in the bezier to form sides the cages - const float optimalT = 0.145f; - - // Whether or not to flip the the interior cage nodes - int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; - const float middleT = 0.5f; - float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); - float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); - float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; - - /* - P1 - + + const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); + const float2 normalToLine = float2(-lineVector.y, lineVector.x); + if (vertexIdx == 0u || vertexIdx == 1u) + { + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[0u] + + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) + - lineVector * antiAliasedLineThickness; + } + else // if (vertexIdx == 2u || vertexIdx == 3u) + { + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[1u] + + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) + + lineVector * antiAliasedLineThickness; + } - exterior0 exterior1 - ---------------------- - / \- - -/ ---------------- \ - / -/interior0 interior1 - / / \ \- - -/ -/ \- \ - / -/ \ \- - / / \- \ - P0 + \ + P2 - */ + outV.setLineStart(transformedPoints[0u]); + outV.setLineEnd(transformedPoints[1u]); - // Internal cage points - float2 interior0; - float2 interior1; + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; + } + else if (objType == ObjectType::QUAD_BEZIER) + { + pfloat64_t2 points[3u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + points[2u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); + + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); + + // transform these points into screen space and pass to fragment + float2 transformedPoints[3u]; + for (uint i = 0u; i < 3u; ++i) + { + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); + } - float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); + shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); + shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); + outV.setQuadratic(quadratic); + outV.setQuadraticPrecomputedArcLenData(preCompData); - float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); - float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; - float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; - float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; + float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); - float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; - float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; - float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ + float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; + float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; + float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; + float MaxCurvature; + if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) + MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); + else + MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - // Interiors + // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; + // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) + // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, + const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); + if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; + //OBB Fallback + float2 obbV0; + float2 obbV1; + float2 obbV2; + float2 obbV3; + quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); + if (subsectionIdx == 0) + { + if (vertexIdx == 0u) + outV.position = float4(obbV0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(obbV1, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(obbV3, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(obbV2, 0.0, 1.0f); + } + else + outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); } + else { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation + // this is the place where we use it's tangent in the bezier to form sides the cages + const float optimalT = 0.145f; + + // Whether or not to flip the the interior cage nodes + int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; + + const float middleT = 0.5f; + float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); + float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); + float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; + + /* + P1 + + + + + exterior0 exterior1 + ---------------------- + / \- + -/ ---------------- \ + / -/interior0 interior1 + / / \ \- + -/ -/ \- \ + / -/ \ \- + / / \- \ + P0 + \ + P2 + */ + + // Internal cage points + float2 interior0; + float2 interior1; + + float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + + + float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); + float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; + float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; + float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + + float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); + float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; + float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; + float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + + // Interiors + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; + } + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + } + + if (subsectionIdx == 0u) + { + float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior0, 0.0, 1.0f); + } + else if (subsectionIdx == 1u) + { + if (vertexIdx == 0u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(interior0, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } + else if (subsectionIdx == 2u) + { + float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } } - if (subsectionIdx == 0u) - { - float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float FLOAT_INF = numeric_limits::infinity; + const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior0, 0.0, 1.0f); - } - else if (subsectionIdx == 1u) - { - if (vertexIdx == 0u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(interior0, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); - } - else if (subsectionIdx == 2u) + if (lineStyle.isRoadStyleFlag) { - float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; + const pfloat64_t2 circleCenter = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const float2 v = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); - } - } + const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); + outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); - outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float FLOAT_INF = numeric_limits::infinity; - const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); + // Find other miter vertices + const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); + const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); - if (lineStyle.isRoadStyleFlag) - { - const pfloat64_t2 circleCenter = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - const float2 v = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); + // Pass the precomputed trapezoid values for the sdf + { + float vLen = length(v); + float2 intersectionDirection = v / vLen; - const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); - outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); + float longBase = sinHalfAngleBetweenNormals; + float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); + // height of the trapezoid / triangle + float hLen = min(globals.miterLimit, vLen); - // Find other miter vertices - const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); - const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); + outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection * sdfLineThickness); + outV.setPolylineConnectorTrapezoidEnd(intersectionDirection * hLen * sdfLineThickness); + outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); + outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); + } - // Pass the precomputed trapezoid values for the sdf - { - float vLen = length(v); - float2 intersectionDirection = v / vLen; - - float longBase = sinHalfAngleBetweenNormals; - float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); - // height of the trapezoid / triangle - float hLen = min(globals.miterLimit, vLen); - - outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection * sdfLineThickness); - outV.setPolylineConnectorTrapezoidEnd(intersectionDirection * hLen * sdfLineThickness); - outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); - outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); - } + if (vertexIdx == 0u) + { + const float2 V1 = normalize(mul(v, rotationMatrix)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV1 = circleCenterScreenSpace + V1; + outV.position = float4(screenSpaceV1, 0.0f, 1.0f); + } + else if (vertexIdx == 1u) + { + outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); + } + else if (vertexIdx == 2u) + { + // find intersection point vertex + float2 intersectionPoint = v * antiAliasedLineThickness * 2.0f; + intersectionPoint += circleCenterScreenSpace; + outV.position = float4(intersectionPoint, 0.0f, 1.0f); + } + else if (vertexIdx == 3u) + { + const float2 V2 = normalize(mul(rotationMatrix, v)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV2 = circleCenterScreenSpace + V2; + outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + } - if (vertexIdx == 0u) - { - const float2 V1 = normalize(mul(v, rotationMatrix)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV1 = circleCenterScreenSpace + V1; - outV.position = float4(screenSpaceV1, 0.0f, 1.0f); - } - else if (vertexIdx == 1u) - { - outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; } - else if (vertexIdx == 2u) - { - // find intersection point vertex - float2 intersectionPoint = v * antiAliasedLineThickness * 2.0f; - intersectionPoint += circleCenterScreenSpace; - outV.position = float4(intersectionPoint, 0.0f, 1.0f); - } - else if (vertexIdx == 3u) + else { - const float2 V2 = normalize(mul(rotationMatrix, v)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV2 = circleCenterScreenSpace + V2; - outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + outV.position = INVALID_VERTEX; } - - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else - { - outV.position = INVALID_VERTEX; } } - } - else if (objType == ObjectType::CURVE_BOX) - { - CurveBox curveBox; - curveBox.aabbMin = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - curveBox.aabbMax = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - - for (uint32_t i = 0; i < 3; i ++) + else if (objType == ObjectType::CURVE_BOX) { - curveBox.curveMin[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); - curveBox.curveMax[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); - } + CurveBox curveBox; + curveBox.aabbMin = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + curveBox.aabbMax = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + + for (uint32_t i = 0; i < 3; i ++) + { + curveBox.curveMin[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); + curveBox.curveMax[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); + } - pfloat64_t2 aabbMaxXMinY; - aabbMaxXMinY.x = curveBox.aabbMax.x; - aabbMaxXMinY.y = curveBox.aabbMin.y; + pfloat64_t2 aabbMaxXMinY; + aabbMaxXMinY.x = curveBox.aabbMax.x; + aabbMaxXMinY.y = curveBox.aabbMin.y; - pfloat64_t2 aabbMinXMaxY; - aabbMinXMaxY.x = curveBox.aabbMin.x; - aabbMinXMaxY.y = curveBox.aabbMax.y; + pfloat64_t2 aabbMinXMaxY; + aabbMinXMaxY.x = curveBox.aabbMin.x; + aabbMinXMaxY.y = curveBox.aabbMax.y; - const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); - const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); + const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); + const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); - const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); + const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); - // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) - outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); + // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) + outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); - const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); + const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); - // We don't dilate on AMD (= no fragShaderInterlock) - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? - float2 dilateVec; - float2 dilatedUV; - dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); + // We don't dilate on AMD (= no fragShaderInterlock) + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? + float2 dilateVec; + float2 dilatedUV; + dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); - // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx - const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + - curveBox.aabbMax * undilatedCornerF64; + // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx + const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + + curveBox.aabbMax * undilatedCornerF64; - const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); + const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); - outV.position = float4(coord, 0.f, 1.f); + outV.position = float4(coord, 0.f, 1.f); - const uint major = (uint)SelectedMajorAxis; - const uint minor = 1-major; - - // A, B & C get converted from unorm to [0, 1] - // A & B get converted from [0,1] to [-2, 2] - shapes::Quadratic curveMin = shapes::Quadratic::construct( - curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); - shapes::Quadratic curveMax = shapes::Quadratic::construct( - curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); - - outV.setMinorBBoxUV(dilatedUV[minor]); - outV.setMajorBBoxUV(dilatedUV[major]); - - outV.setCurveMinMinor(math::equations::Quadratic::construct( - curveMin.A[minor], - curveMin.B[minor], - curveMin.C[minor])); - outV.setCurveMinMajor(math::equations::Quadratic::construct( - curveMin.A[major], - curveMin.B[major], - curveMin.C[major])); - - outV.setCurveMaxMinor(math::equations::Quadratic::construct( - curveMax.A[minor], - curveMax.B[minor], - curveMax.C[minor])); - outV.setCurveMaxMajor(math::equations::Quadratic::construct( - curveMax.A[major], - curveMax.B[major], - curveMax.C[major])); - - //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( - // curveMin.A[major], - // curveMin.B[major], - // curveMin.C[major] - maxCorner[major]); - //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( - // curveMax.A[major], - // curveMax.B[major], - // curveMax.C[major] - maxCorner[major]); - //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); - //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); - } - else if (objType == ObjectType::FONT_GLYPH) - { - LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); - const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style + const uint major = (uint)SelectedMajorAxis; + const uint minor = 1-major; + + // A, B & C get converted from unorm to [0, 1] + // A & B get converted from [0,1] to [-2, 2] + shapes::Quadratic curveMin = shapes::Quadratic::construct( + curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); + shapes::Quadratic curveMax = shapes::Quadratic::construct( + curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); + + outV.setMinorBBoxUV(dilatedUV[minor]); + outV.setMajorBBoxUV(dilatedUV[major]); + + outV.setCurveMinMinor(math::equations::Quadratic::construct( + curveMin.A[minor], + curveMin.B[minor], + curveMin.C[minor])); + outV.setCurveMinMajor(math::equations::Quadratic::construct( + curveMin.A[major], + curveMin.B[major], + curveMin.C[major])); + + outV.setCurveMaxMinor(math::equations::Quadratic::construct( + curveMax.A[minor], + curveMax.B[minor], + curveMax.C[minor])); + outV.setCurveMaxMajor(math::equations::Quadratic::construct( + curveMax.A[major], + curveMax.B[major], + curveMax.C[major])); + + //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( + // curveMin.A[major], + // curveMin.B[major], + // curveMin.C[major] - maxCorner[major]); + //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( + // curveMax.A[major], + // curveMax.B[major], + // curveMax.C[major] - maxCorner[major]); + //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); + //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); + } + else if (objType == ObjectType::FONT_GLYPH) + { + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style - GlyphInfo glyphInfo; - glyphInfo.topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - glyphInfo.dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - glyphInfo.aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - float32_t2 minUV = glyphInfo.getMinUV(); - uint16_t textureID = glyphInfo.getTextureID(); - - const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; - const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); - const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); - const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) - const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) + GlyphInfo glyphInfo; + glyphInfo.topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + glyphInfo.dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + glyphInfo.aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + float32_t2 minUV = glyphInfo.getMinUV(); + uint16_t textureID = glyphInfo.getTextureID(); + + const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; + const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); + const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); + const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) + const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) - const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); + const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); - const float2 vx = screenDirU * dilateRate.x; - const float2 vy = screenDirV * dilateRate.y; - const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; - float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; + const float2 vx = screenDirU * dilateRate.x; + const float2 vy = screenDirV * dilateRate.y; + const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; + float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; - if (corner.y == 0 && italicTiltSlope > 0.0f) - coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); + if (corner.y == 0 && italicTiltSlope > 0.0f) + coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); - // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY - // but if the glyph box is stretched in any way then we won't get correct msdf - // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances - // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture - // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize - const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max - const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max - outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader - - // In order to keep the shape scale constant with any dilation values: - // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left - const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); - const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); - const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; + // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY + // but if the glyph box is stretched in any way then we won't get correct msdf + // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances + // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture + // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize + const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max + const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max + outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader + + // In order to keep the shape scale constant with any dilation values: + // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left + const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); + const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); + const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; - const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); + const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); - outV.position = float4(coord, 0.f, 1.f); - outV.setFontGlyphUV(uv); - outV.setFontGlyphTextureId(textureID); - } - else if (objType == ObjectType::IMAGE) - { - pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; - const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); - const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); - const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - float2 uv = corner; // non-dilated + outV.position = float4(coord, 0.f, 1.f); + outV.setFontGlyphUV(uv); + outV.setFontGlyphTextureId(textureID); + } + else if (objType == ObjectType::IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated - float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; - outV.position = float4(ndcCorner, 0.f, 1.f); - outV.setImageUV(uv); - outV.setImageTextureId(textureID); - } - + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } -// Make the cage fullscreen for testing: + // Make the cage fullscreen for testing: #if 0 - // disabled for object of POLYLINE_CONNECTOR type, since miters would cover whole screen - if(objType != ObjectType::POLYLINE_CONNECTOR) - { - if (vertexIdx == 0u) - outV.position = float4(-1, -1, 0, 1); - else if (vertexIdx == 1u) - outV.position = float4(-1, +1, 0, 1); - else if (vertexIdx == 2u) - outV.position = float4(+1, -1, 0, 1); - else if (vertexIdx == 3u) - outV.position = float4(+1, +1, 0, 1); - } + // disabled for object of POLYLINE_CONNECTOR type, since miters would cover whole screen + if(objType != ObjectType::POLYLINE_CONNECTOR) + { + if (vertexIdx == 0u) + outV.position = float4(-1, -1, 0, 1); + else if (vertexIdx == 1u) + outV.position = float4(-1, +1, 0, 1); + else if (vertexIdx == 2u) + outV.position = float4(+1, -1, 0, 1); + else if (vertexIdx == 3u) + outV.position = float4(+1, +1, 0, 1); + } #endif - + } outV.clip = float4(outV.position.x - clipProjectionData.minClipNDC.x, outV.position.y - clipProjectionData.minClipNDC.y, clipProjectionData.maxClipNDC.x - outV.position.x, clipProjectionData.maxClipNDC.y - outV.position.y); return outV; -#endif } From 2528e75062433a9e78a329da2a37434c9d92525c Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 11 Apr 2025 14:04:58 +0200 Subject: [PATCH 040/129] Implemented anti aliasing --- 62_CAD/main.cpp | 38 ++++-- .../main_pipeline/fragment_shader.hlsl | 126 +++++++++++------- 2 files changed, 103 insertions(+), 61 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index e425dce54..77f90d13d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3170,8 +3170,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_9) { - // GRID - + // GRID (outdated) /*core::vector vertices = { { float32_t2(-200.0f, -200.0f), 10.0f }, { float32_t2(-50.0f, -200.0f), 50.0f }, @@ -3203,20 +3202,32 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu };*/ // PYRAMID - core::vector vertices = { + //{ float64_t2(0.0, 0.0), 100.0 }, //0 + //{ float64_t2(-200.0, -200.0), 10.0 }, //1 + //{ float64_t2(200.0, -200.0), 10.0 }, //2 + //{ float64_t2(200.0, 200.0), -20.0 }, //3 + //{ float64_t2(-200.0, 200.0), 10.0 }, //4 + { float64_t2(0.0, 0.0), 100.0 }, { float64_t2(-200.0, -200.0), 10.0 }, { float64_t2(200.0, -200.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, -200.0), 10.0 }, { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, }; core::vector indices = { 0, 1, 2, - 0, 2, 3, - 0, 3, 4, - 0, 4, 1 + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 }; // SINGLE TRIANGLE @@ -3257,17 +3268,16 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS // 2 - DISCRETE_FIXED_LENGTH_INTERVALS // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; switch (m_shadingModeExample) { case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - //dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; @@ -3277,16 +3287,16 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmSettingsInfo.intervalWidth = 8.0f; dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index ab6388bc8..ffa94f15c 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -420,6 +420,26 @@ float dot2(in float2 vec) return dot(vec, vec); } +struct DTMHeightShadingAAInfo +{ + float currentHeight; + float4 currentSegmentColor; + float nearestSegmentHeight; + float4 nearestSegmentColor; +}; + +void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTMHeightShadingAAInfo aaInfo, out float3 textureColor, out float localAlpha) +{ + float heightDeriv = fwidth(aaInfo.currentHeight); + + float pxDistanceToNearestSegment = abs(aaInfo.currentHeight - aaInfo.nearestSegmentHeight) / heightDeriv; + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(aaInfo.nearestSegmentColor, aaInfo.currentSegmentColor, nearestSegmentColorCoverage); + + localAlpha *= localHeightColor.a; + textureColor = localHeightColor.rgb * localAlpha + textureColor * (1.0f - localAlpha); +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -438,7 +458,7 @@ float4 fragMain(PSInput input) : SV_TARGET const float outlineThickness = input.getOutlineThickness(); const float contourThickness = input.getContourLineThickness(); const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; // TODO: figure out what is it for ---> [ERFAN's REPLY: no need to give shit about this in dtms, it's for special shape styles] + const float stretch = 1.0f; const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); @@ -507,58 +527,73 @@ float4 fragMain(PSInput input) : SV_TARGET if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t mapIndexPlus1 = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t mapIndex = mapIndexPlus1 == 0 ? mapIndexPlus1 : mapIndexPlus1 - 1; + int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); - float heightDeriv = fwidth(height); - bool blendWithPrev = true - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[mapIndexPlus1] + dtm.heightColorMapHeights[mapIndex])); - // logic explainer: if colorIdx is 0.0 then it means blend with next // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values - if (blendWithPrev) + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[upperBoundIndex] + dtm.heightColorMapHeights[mapIndex])); + + DTMHeightShadingAAInfo aaInfo; + aaInfo.currentHeight = height; + aaInfo.currentSegmentColor = dtm.heightColorMapColors[mapIndex]; + aaInfo.nearestSegmentHeight = blendWithPrev ? dtm.heightColorMapHeights[mapIndex] : dtm.heightColorMapHeights[mapIndexNext]; + aaInfo.nearestSegmentColor = blendWithPrev ? dtm.heightColorMapColors[mapIndexPrev] : dtm.heightColorMapColors[mapIndexNext]; + + calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, textureColor, localAlpha); + } + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float interval = dtm.intervalWidth; + float heightMinShadingHeightDiff = (height - minShadingHeight); + int sectionIndex = int(heightMinShadingHeightDiff / interval); + float heightTmp = minShadingHeight + float(sectionIndex) * interval; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + bool blendWithPrev; + if (upperBoundHeightIndex == 0) { - if (mapIndex > 0) - { - float pxDistanceToPrevHeight = (height - dtm.heightColorMapHeights[mapIndex]) / heightDeriv; - float prevColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToPrevHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex - 1].rgb, dtm.heightColorMapColors[mapIndex].rgb, prevColorCoverage); - } - else - { - textureColor = dtm.heightColorMapColors[mapIndex].rgb; - } + interpolationVal = 1.0f; + blendWithPrev = false; } else { - if (mapIndex < heightMapSize - 1) - { - float pxDistanceToNextHeight = (height - dtm.heightColorMapHeights[mapIndexPlus1]) / heightDeriv; - float nextColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNextHeight); - textureColor = lerp(dtm.heightColorMapColors[mapIndex].rgb, dtm.heightColorMapColors[mapIndexPlus1].rgb, nextColorCoverage); - } - else - { - textureColor = dtm.heightColorMapColors[mapIndex].rgb; - } + interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + blendWithPrev = height - interval * sectionIndex < 0.5f; } - //localAlpha = dtm.heightColorMapColors[mapIndex].a; - } - else - { - float heightTmp; - if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + DTMHeightShadingAAInfo aaInfo; + aaInfo.currentHeight = height; + aaInfo.currentSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + if (blendWithPrev) { - float interval = dtm.intervalWidth; - int sectionIndex = int((height - minShadingHeight) / interval); - heightTmp = minShadingHeight + float(sectionIndex) * interval; + aaInfo.nearestSegmentHeight = heightTmp; + aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal - 1.0f / interval); } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + else { - heightTmp = height; + aaInfo.nearestSegmentHeight = heightTmp + interval; + aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal + 1.0f / interval); } + calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, textureColor, localAlpha); + } + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + float heightTmp = height; DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); @@ -575,16 +610,13 @@ float4 fragMain(PSInput input) : SV_TARGET interpolationVal = 1.0f; else interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - - textureColor = lerp(lowerBoundColor.rgb, upperBoundColor.rgb, interpolationVal); - localAlpha = lerp(lowerBoundColor.a, upperBoundColor.a, interpolationVal);; + + float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + + localAlpha *= localHeightColor.a; + textureColor = localHeightColor.rgb * localAlpha + textureColor * (1.0f - localAlpha); } } - //else // TODO: remove!! - //{ - // printf("WTF"); - // return float4(0.0f, 0.0f, 0.0f, 1.0f); - //} // CONTOUR From fd4e576665b228ede3acd28fc32119cc42d8cf5e Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 12 Apr 2025 12:42:23 +0200 Subject: [PATCH 041/129] Triangle dilation --- 62_CAD/main.cpp | 4 +- .../main_pipeline/fragment_shader.hlsl | 16 ++++++-- .../shaders/main_pipeline/vertex_shader.hlsl | 38 ++++++++++++++++--- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 77f90d13d..3ad285c46 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3211,9 +3211,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { float64_t2(0.0, 0.0), 100.0 }, { float64_t2(-200.0, -200.0), 10.0 }, - { float64_t2(200.0, -200.0), 10.0 }, + { float64_t2(200.0, -100.0), 10.0 }, { float64_t2(0.0, 0.0), 100.0 }, - { float64_t2(200.0, -200.0), 10.0 }, + { float64_t2(200.0, -100.0), 10.0 }, { float64_t2(200.0, 200.0), -20.0 }, { float64_t2(0.0, 0.0), 100.0 }, { float64_t2(200.0, 200.0), -20.0 }, diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index ffa94f15c..d19503ca8 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -440,6 +440,15 @@ void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTM textureColor = localHeightColor.rgb * localAlpha + textureColor * (1.0f - localAlpha); } +float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) +{ + float denom = (v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y); + float u = ((v2.y - v3.y) * (p.x - v3.x) + (v3.x - v2.x) * (p.y - v3.y)) / denom; + float v = ((v3.y - v1.y) * (p.x - v3.x) + (v1.x - v3.x) * (p.y - v3.y)) / denom; + float w = 1.0 - u - v; + return float3(u, v, w); +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -470,7 +479,8 @@ float4 fragMain(PSInput input) : SV_TARGET v[1] = input.getScreenSpaceVertexAttribs(1); v[2] = input.getScreenSpaceVertexAttribs(2); - const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + //const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; + const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); // indices of points constructing every edge uint2 edgePoints[3]; @@ -483,8 +493,8 @@ float4 fragMain(PSInput input) : SV_TARGET opposingVertexIdx[0] = 2; opposingVertexIdx[1] = 0; opposingVertexIdx[2] = 1; - - float height = input.getHeight(); + + float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; // HEIGHT SHADING const uint32_t heightMapSize = dtm.heightColorEntryCount; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index b300a6958..cef5fb4c2 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -102,20 +102,46 @@ PSInput main(uint vertexID : SV_VertexID) outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); - pfloat64_t2 vtxPos; - vtxPos.x = _static_cast(vtx.pos.x); - vtxPos.y = _static_cast(vtx.pos.y); MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); clipProjectionData = getClipProjectionData(mainObj); - float2 transformedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos); + // assuming there are 3 * N vertices, number of vertices is equal to number of indices and indices are sequential starting from 0 + float2 transformedOriginalPos; + float2 transformedDilatedPos; + { + uint32_t firstVertexOfCurrentTriangleIndex = vertexID - vertexID % 3; + uint32_t currentVertexWithinTriangleIndex = vertexID - firstVertexOfCurrentTriangleIndex; + + TriangleMeshVertex triangleVertices[3]; + triangleVertices[0] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u); + triangleVertices[1] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u); + triangleVertices[2] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u); + transformedOriginalPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, triangleVertices[currentVertexWithinTriangleIndex].pos); + + pfloat64_t2 triangleCentroid; + triangleCentroid.x = (triangleVertices[0].pos.x + triangleVertices[1].pos.x + triangleVertices[2].pos.x) / _static_cast(3.0f); + triangleCentroid.y = (triangleVertices[0].pos.y + triangleVertices[1].pos.y + triangleVertices[2].pos.y) / _static_cast(3.0f); + + // move triangles to local space, with centroid at (0, 0) + triangleVertices[0].pos = triangleVertices[0].pos - triangleCentroid; + triangleVertices[1].pos = triangleVertices[1].pos - triangleCentroid; + triangleVertices[2].pos = triangleVertices[2].pos - triangleCentroid; + + // TODO: calculate dialation factor + pfloat64_t dialationFactor = _static_cast(2.0f); + pfloat64_t2 dialatedVertex = triangleVertices[currentVertexWithinTriangleIndex].pos * dialationFactor; + + dialatedVertex = dialatedVertex + triangleCentroid; + + transformedDilatedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, dialatedVertex); + } - outV.position.xy = transformedPos; + outV.position.xy = transformedDilatedPos; outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); outV.setHeight(heightAsFloat); - outV.setScreenSpaceVertexAttribs(float3(transformedPos, heightAsFloat)); + outV.setScreenSpaceVertexAttribs(float3(transformedOriginalPos, heightAsFloat)); outV.setCurrentWorldToScreenRatio( _static_cast((_static_cast(2.0f) / (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) From 6c907e49bce8d7bde928af6b521fa2d7cc280584 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 14 Apr 2025 12:54:50 +0200 Subject: [PATCH 042/129] Added few todos --- .../main_pipeline/fragment_shader.hlsl | 20 +++++++++---------- .../shaders/main_pipeline/vertex_shader.hlsl | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index d19503ca8..bfb267a01 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -430,6 +430,7 @@ struct DTMHeightShadingAAInfo void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTMHeightShadingAAInfo aaInfo, out float3 textureColor, out float localAlpha) { + //TODO: move outside float heightDeriv = fwidth(aaInfo.currentHeight); float pxDistanceToNearestSegment = abs(aaInfo.currentHeight - aaInfo.nearestSegmentHeight) / heightDeriv; @@ -437,7 +438,7 @@ void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTM float4 localHeightColor = lerp(aaInfo.nearestSegmentColor, aaInfo.currentSegmentColor, nearestSegmentColorCoverage); localAlpha *= localHeightColor.a; - textureColor = localHeightColor.rgb * localAlpha + textureColor * (1.0f - localAlpha); + textureColor = localHeightColor.rgb; } float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) @@ -529,6 +530,7 @@ float4 fragMain(PSInput input) : SV_TARGET convexPolygonSdf = max(convexPolygonSdf, line3Sdf); convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + // TODO: separate localAlpha = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); // calculate height color @@ -577,13 +579,13 @@ float4 fragMain(PSInput input) : SV_TARGET bool blendWithPrev; if (upperBoundHeightIndex == 0) { - interpolationVal = 1.0f; + interpolationVal = 1.0f; // TODO: investigate if it is correct blendWithPrev = false; } else { interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - blendWithPrev = height - interval * sectionIndex < 0.5f; + blendWithPrev = height - interval * sectionIndex < 0.5f; // TODO: investigate if it is correct } DTMHeightShadingAAInfo aaInfo; @@ -603,8 +605,6 @@ float4 fragMain(PSInput input) : SV_TARGET } else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) { - float heightTmp = height; - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; @@ -619,7 +619,7 @@ float4 fragMain(PSInput input) : SV_TARGET if (upperBoundHeightIndex == 0) interpolationVal = 1.0f; else - interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); @@ -648,7 +648,7 @@ float4 fragMain(PSInput input) : SV_TARGET // TODO: case where heights we are looking for are on all three vertices for (int i = 0; i < 3; ++i) { - if (contourLinePointsIdx == 3) + if (contourLinePointsIdx == 2) break; const uint2 currentEdgePoints = edgePoints[i]; @@ -694,8 +694,6 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = max(localAlpha, contourLocalAlpha); } - - // OUTLINE // find sdf of every edge @@ -719,8 +717,8 @@ float4 fragMain(PSInput input) : SV_TARGET float3 B = v[currentEdgePoints[1]]; float3 AB = B - A; float ABLen = length(AB); - - distances[i] = (triangleAreaTimesTwo / ABLen) * baryCoord[opposingVertexIdx[i]]; + float triangleHeightToOpositeVertex = triangleAreaTimesTwo / ABLen; + distances[i] = triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]; } float minDistance = nbl::hlsl::numeric_limits::max; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index cef5fb4c2..20c29f16a 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -137,8 +137,7 @@ PSInput main(uint vertexID : SV_VertexID) transformedDilatedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, dialatedVertex); } - outV.position.xy = transformedDilatedPos; - outV.position = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution); + outV.position = transformFromSreenSpaceToNdc(transformedDilatedPos, globals.resolution); const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); outV.setHeight(heightAsFloat); outV.setScreenSpaceVertexAttribs(float3(transformedOriginalPos, heightAsFloat)); @@ -151,6 +150,7 @@ PSInput main(uint vertexID : SV_VertexID) DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + // TODO: maybe move to fragment shader since we may have multiple contour styles later const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); From f173c71866259189779de0edc8bb209717bbf7b2 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 14 Apr 2025 16:00:51 +0200 Subject: [PATCH 043/129] Refactor --- 62_CAD/CTriangleMesh.h | 4 + 62_CAD/DrawResourcesFiller.cpp | 4 + 62_CAD/main.cpp | 4 + 62_CAD/shaders/globals.hlsl | 6 +- .../main_pipeline/fragment_shader.hlsl | 620 ++++++++++-------- 5 files changed, 357 insertions(+), 281 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 6c68cec27..c1dcbca68 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -25,6 +25,10 @@ struct DTMSettingsInfo float intervalWidth; E_HEIGHT_SHADING_MODE heightShadingMode; + bool drawHeightsFlag; + bool drawContoursFlag; + bool drawOutlineFlag; + void addHeightColorMapEntry(float height, float32_t4 color) { heightColorSet.emplace(height, color); diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index d28843a31..c11b0a67f 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -653,6 +653,10 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm } _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); + dtmSettings.drawHeightsFlag = static_cast(dtmSettingsInfo.drawHeightsFlag); + dtmSettings.drawContoursFlag = static_cast(dtmSettingsInfo.drawContoursFlag); + dtmSettings.drawOutlineFlag = static_cast(dtmSettingsInfo.drawOutlineFlag); + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) { const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 3ad285c46..48ca4f5a3 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3264,6 +3264,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); + dtmSettingsInfo.drawHeightsFlag = true; + dtmSettingsInfo.drawContoursFlag = true; + dtmSettingsInfo.drawOutlineFlag = true; + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS // 2 - DISCRETE_FIXED_LENGTH_INTERVALS diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 24a833334..bf4c06db7 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -359,7 +359,11 @@ struct DTMSettings const static uint32_t HeightColorMapMaxEntries = 16u; uint32_t outlineLineStyleIdx; // index into line styles uint32_t contourLineStyleIdx; // index into line styles - + + int drawHeightsFlag; + int drawContoursFlag; + int drawOutlineFlag; + // contour lines float contourLinesStartHeight; float contourLinesEndHeight; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index bfb267a01..be5e0bf07 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -428,17 +428,14 @@ struct DTMHeightShadingAAInfo float4 nearestSegmentColor; }; -void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTMHeightShadingAAInfo aaInfo, out float3 textureColor, out float localAlpha) +void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTMHeightShadingAAInfo aaInfo, in float heightDeriv, out float4 outputColor) { - //TODO: move outside - float heightDeriv = fwidth(aaInfo.currentHeight); - float pxDistanceToNearestSegment = abs(aaInfo.currentHeight - aaInfo.nearestSegmentHeight) / heightDeriv; float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); float4 localHeightColor = lerp(aaInfo.nearestSegmentColor, aaInfo.currentSegmentColor, nearestSegmentColorCoverage); - localAlpha *= localHeightColor.a; - textureColor = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + outputColor.rgb = localHeightColor.rgb; } float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) @@ -450,322 +447,385 @@ float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3 return float3(u, v, w); } -[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] -[shader("pixel")] -float4 fragMain(PSInput input) : SV_TARGET +float4 calculateDTMHeightColor(in DTMSettings dtm, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) { - float localAlpha = 0.0f; - float3 textureColor = float3(0, 0, 0); // color sampled from a texture + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 1.0f); - ObjectType objType = input.getObjType(); - const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); - const MainObject mainObj = loadMainObject(currentMainObjectIdx); - - if (pc.isDTMRendering) - { - // TRIANGLE RENDERING + // HEIGHT SHADING + const uint32_t heightMapSize = dtm.heightColorEntryCount; + float minShadingHeight = dtm.heightColorMapHeights[0]; + float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; + + if (heightMapSize > 0) + { + // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez + float2 e0 = v[1] - v[0]; + float2 e1 = v[2] - v[1]; + float2 e2 = v[0] - v[2]; + + float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); + float2 v0 = fragPos - v[0]; + float2 v1 = fragPos - v[1]; + float2 v2 = fragPos - v[2]; + + float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); + float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); + float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); + + float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); + float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); + float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); + float line3Sdf = (minShadingHeight - height) / heightDeriv; + float line4Sdf = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = max(line0Sdf, line1Sdf); + convexPolygonSdf = max(convexPolygonSdf, line2Sdf); + convexPolygonSdf = max(convexPolygonSdf, line3Sdf); + convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + + // TODO: separate + outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); + + // calculate height color + DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); + + if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { - const float outlineThickness = input.getOutlineThickness(); - const float contourThickness = input.getContourLineThickness(); - const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[upperBoundIndex] + dtm.heightColorMapHeights[mapIndex])); + + DTMHeightShadingAAInfo aaInfo; + aaInfo.currentHeight = height; + aaInfo.currentSegmentColor = dtm.heightColorMapColors[mapIndex]; + aaInfo.nearestSegmentHeight = blendWithPrev ? dtm.heightColorMapHeights[mapIndex] : dtm.heightColorMapHeights[mapIndexNext]; + aaInfo.nearestSegmentColor = blendWithPrev ? dtm.heightColorMapColors[mapIndexPrev] : dtm.heightColorMapColors[mapIndexNext]; + + calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, heightDeriv, outputColor); + } + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float interval = dtm.intervalWidth; + float heightMinShadingHeightDiff = (height - minShadingHeight); + int sectionIndex = int(heightMinShadingHeightDiff / interval); + float heightTmp = minShadingHeight + float(sectionIndex) * interval; - DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); - float3 v[3]; - v[0] = input.getScreenSpaceVertexAttribs(0); - v[1] = input.getScreenSpaceVertexAttribs(1); - v[2] = input.getScreenSpaceVertexAttribs(2); + float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - //const float3 baryCoord = nbl::hlsl::spirv::BaryCoordKHR; - const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); - - // indices of points constructing every edge - uint2 edgePoints[3]; - edgePoints[0] = uint2(0, 1); - edgePoints[1] = uint2(1, 2); - edgePoints[2] = uint2(2, 0); + float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; - // index of vertex opposing an edge, needed for calculation of triangle heights - uint opposingVertexIdx[3]; - opposingVertexIdx[0] = 2; - opposingVertexIdx[1] = 0; - opposingVertexIdx[2] = 1; + float interpolationVal; + bool blendWithPrev; + if (upperBoundHeightIndex == 0) + { + interpolationVal = 1.0f; // TODO: investigate if it is correct + blendWithPrev = false; + } + else + { + interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + blendWithPrev = height - interval * sectionIndex < 0.5f; // TODO: investigate if it is correct + } - float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + DTMHeightShadingAAInfo aaInfo; + aaInfo.currentHeight = height; + aaInfo.currentSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + if (blendWithPrev) + { + aaInfo.nearestSegmentHeight = heightTmp; + aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal - 1.0f / interval); + } + else + { + aaInfo.nearestSegmentHeight = heightTmp + interval; + aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal + 1.0f / interval); + } + calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, heightDeriv, outputColor); + } + else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - // HEIGHT SHADING - const uint32_t heightMapSize = dtm.heightColorEntryCount; - float minShadingHeight = dtm.heightColorMapHeights[0]; - float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; + float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - if (heightMapSize > 0) - { - // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez - float2 e0 = v[1] - v[0]; - float2 e1 = v[2] - v[1]; - float2 e2 = v[0] - v[2]; - - float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); - float2 v0 = input.position.xy - v[0]; - float2 v1 = input.position.xy - v[1]; - float2 v2 = input.position.xy - v[2]; - - float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); - float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); - float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); - - float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); - float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); - float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); - float heightDeriv = fwidth(height); - float line3Sdf = (minShadingHeight - height) / heightDeriv; - float line4Sdf = (height - maxShadingHeight) / heightDeriv; - - float convexPolygonSdf = max(line0Sdf, line1Sdf); - convexPolygonSdf = max(convexPolygonSdf, line2Sdf); - convexPolygonSdf = max(convexPolygonSdf, line3Sdf); - convexPolygonSdf = max(convexPolygonSdf, line4Sdf); - - // TODO: separate - localAlpha = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); - - // calculate height color - DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); - - if(mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) - { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - int mapIndex = max(upperBoundIndex - 1, 0); - int mapIndexPrev = max(mapIndex - 1, 0); - int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); - - // logic explainer: if colorIdx is 0.0 then it means blend with next - // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true - // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values - bool blendWithPrev = (mapIndex > 0) - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[upperBoundIndex] + dtm.heightColorMapHeights[mapIndex])); - - DTMHeightShadingAAInfo aaInfo; - aaInfo.currentHeight = height; - aaInfo.currentSegmentColor = dtm.heightColorMapColors[mapIndex]; - aaInfo.nearestSegmentHeight = blendWithPrev ? dtm.heightColorMapHeights[mapIndex] : dtm.heightColorMapHeights[mapIndexNext]; - aaInfo.nearestSegmentColor = blendWithPrev ? dtm.heightColorMapColors[mapIndexPrev] : dtm.heightColorMapColors[mapIndexNext]; - - calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, textureColor, localAlpha); - } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) - { - float interval = dtm.intervalWidth; - float heightMinShadingHeightDiff = (height - minShadingHeight); - int sectionIndex = int(heightMinShadingHeightDiff / interval); - float heightTmp = minShadingHeight + float(sectionIndex) * interval; + float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; + float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; + outputColor.a *= localHeightColor.a; + outputColor.rgb = localHeightColor.rgb * outputColor.a + outputColor.rgb * (1.0f - outputColor.a); + } + } - float interpolationVal; - bool blendWithPrev; - if (upperBoundHeightIndex == 0) - { - interpolationVal = 1.0f; // TODO: investigate if it is correct - blendWithPrev = false; - } - else - { - interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - blendWithPrev = height - interval * sectionIndex < 0.5f; // TODO: investigate if it is correct - } + return outputColor; +} - DTMHeightShadingAAInfo aaInfo; - aaInfo.currentHeight = height; - aaInfo.currentSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - if (blendWithPrev) - { - aaInfo.nearestSegmentHeight = heightTmp; - aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal - 1.0f / interval); - } - else - { - aaInfo.nearestSegmentHeight = heightTmp + interval; - aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal + 1.0f / interval); - } - calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, textureColor, localAlpha); - } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) - { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; +float4 calculateDTMContourColor(in DTMSettings dtm, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float height) +{ + float4 outputColor; + + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + const float contourThickness = psInput.getContourLineThickness(); + float stretch = 1.0f; + float phaseShift = 0.0f; + const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); + + // TODO: move to ubo or push constants + const float startHeight = dtm.contourLinesStartHeight; + const float endHeight = dtm.contourLinesEndHeight; + const float interval = dtm.contourLinesHeightInterval; + + // TODO: can be precomputed + const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + // TODO: case where heights we are looking for are on all three vertices + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 2) + break; - float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - - float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; - - float interpolationVal; - if (upperBoundHeightIndex == 0) - interpolationVal = 1.0f; - else - interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; - float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + if (p1.z < p0.z) + nbl::hlsl::swap(p0, p1); - localAlpha *= localHeightColor.a; - textureColor = localHeightColor.rgb * localAlpha + textureColor * (1.0f - localAlpha); - } - } + float minHeight = p0.z; + float maxHeight = p1.z; - // CONTOUR + if (height >= minHeight && height <= maxHeight) + { + float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); + float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); - // TODO: move to ubo or push constants - const float startHeight = dtm.contourLinesStartHeight; - const float endHeight = dtm.contourLinesEndHeight; - const float interval = dtm.contourLinesHeightInterval; + contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); + ++contourLinePointsIdx; + } + } - // TODO: can be precomputed - const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; + { + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); - // TODO: it actually can output a negative number, fix - int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); - contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); - float contourLineHeight = startHeight + interval * contourLineIdx; + float distance = nbl::hlsl::numeric_limits::max; + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, psInput.position.xy, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, psInput.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + + outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; + outputColor.rgb = contourStyle.color.rgb; + } - int contourLinePointsIdx = 0; - float2 contourLinePoints[2]; - // TODO: case where heights we are looking for are on all three vertices - for (int i = 0; i < 3; ++i) - { - if (contourLinePointsIdx == 2) - break; + return outputColor; +} - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; +float4 calculateDTMOutlineColor(in DTMSettings dtm, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float3 baryCoord, in float height) +{ + float4 outputColor; + + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + const float outlineThickness = psInput.getOutlineThickness(); + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); + const float stretch = 1.0f; + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + // find sdf of every edge + float triangleAreaTimesTwo; + { + float3 AB = v[0] - v[1]; + float3 AC = v[0] - v[2]; + AB.z = 0.0f; + AC.z = 0.0f; - if (p1.z < p0.z) - nbl::hlsl::swap(p0, p1); + // TODO: figure out if there is a faster solution + triangleAreaTimesTwo = length(cross(AB, AC)); + } - float minHeight = p0.z; - float maxHeight = p1.z; + // calculate sdf of every edge as it wasn't stippled + float distances[3]; + for (int i = 0; i < 3; ++i) + { + const uint2 currentEdgePoints = edgePoints[i]; + float3 A = v[currentEdgePoints[0]]; + float3 B = v[currentEdgePoints[1]]; + float3 AB = B - A; + float ABLen = length(AB); + float triangleHeightToOpositeVertex = triangleAreaTimesTwo / ABLen; + + distances[i] = triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]; + } - if (height >= minHeight && height <= maxHeight) - { - float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); - float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (uint i = 0; i < 3; ++i) + distances[i] -= outlineThickness; - contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); - ++contourLinePointsIdx; - } - } + minDistance = min(distances[0], min(distances[1], distances[2])); + } + else + { + for (int i = 0; i < 3; ++i) + { + if (distances[i] > outlineThickness) + continue; - { - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; - float distance = nbl::hlsl::numeric_limits::max; - if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag); - } - else - { - // TODO: - // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! - // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); - } + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); - float contourLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; - textureColor = lerp(textureColor, contourStyle.color.rgb, contourLocalAlpha); - localAlpha = max(localAlpha, contourLocalAlpha); - } + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - // OUTLINE + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, psInput.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); - // find sdf of every edge - float triangleAreaTimesTwo; - { - float3 AB = v[0] - v[1]; - float3 AC = v[0] - v[2]; - AB.z = 0.0f; - AC.z = 0.0f; + minDistance = min(minDistance, distance); + } - // TODO: figure out if there is a faster solution - triangleAreaTimesTwo = length(cross(AB, AC)); - } + } - // calculate sdf of every edge as it wasn't stippled - float distances[3]; - for (int i = 0; i < 3; ++i) - { - const uint2 currentEdgePoints = edgePoints[i]; - float3 A = v[currentEdgePoints[0]]; - float3 B = v[currentEdgePoints[1]]; - float3 AB = B - A; - float ABLen = length(AB); - float triangleHeightToOpositeVertex = triangleAreaTimesTwo / ABLen; - distances[i] = triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]; - } + outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; - float minDistance = nbl::hlsl::numeric_limits::max; - if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - for (uint i = 0; i < 3; ++i) - distances[i] -= outlineThickness; + return outputColor; +} - minDistance = min(distances[0], min(distances[1], distances[2])); - } - else - { - for (int i = 0; i < 3; ++i) - { - if (distances[i] > outlineThickness) - continue; - - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - // long story short, in order for stipple patterns to be consistent: - // - point with lesser x coord should be starting point - // - if x coord of both points are equal then point with lesser y value should be starting point - if (p1.x < p0.x) - nbl::hlsl::swap(p0, p1); - else if (p1.x == p0.x && p1.y < p0.y) - nbl::hlsl::swap(p0, p1); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - - float distance = nbl::hlsl::numeric_limits::max; - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); +struct DTMColorBlender +{ + void init() + { + colorCount = 0; + } - minDistance = min(minDistance, distance); - } + void addColorOnTop(in float4 color) + { + colors[colorCount] = color; + colorCount++; + } - } + float4 blend() + { + if (colorCount == 0) + return float4(0.0f, 0.0f, 0.0f, 1.0f); - float outlineLocalAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; - textureColor = lerp(textureColor, outlineStyle.color.rgb, outlineLocalAlpha); - localAlpha = max(localAlpha, outlineLocalAlpha); + float4 outputColor = colors[0]; + for (int i = 1; i < colorCount; ++i) + { + outputColor.rgb = colors[i].rgb * colors[i].a + outputColor.rgb * outputColor.a * (1.0f - colors[i].a); + outputColor.a = colors[i].a + outputColor.a * (1.0f - colors[i].a); } - return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + return outputColor; + } + + int colorCount; + float4 colors[3]; +}; + +[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] +[shader("pixel")] +float4 fragMain(PSInput input) : SV_TARGET +{ + float localAlpha = 0.0f; + float3 textureColor = float3(0, 0, 0); // color sampled from a texture + + ObjectType objType = input.getObjType(); + const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); + const MainObject mainObj = loadMainObject(currentMainObjectIdx); + + if (pc.isDTMRendering) + { + // TRIANGLE RENDERING + { + DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); + + float3 v[3]; + v[0] = input.getScreenSpaceVertexAttribs(0); + v[1] = input.getScreenSpaceVertexAttribs(1); + v[2] = input.getScreenSpaceVertexAttribs(2); + + // indices of points constructing every edge + uint2 edgePoints[3]; + edgePoints[0] = uint2(0, 1); + edgePoints[1] = uint2(1, 2); + edgePoints[2] = uint2(2, 0); + + const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); + float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + float heightDeriv = fwidth(height); + + DTMColorBlender blender; + blender.init(); + if(dtm.drawHeightsFlag) + blender.addColorOnTop(calculateDTMHeightColor(dtm, v, heightDeriv, input.position.xy, height)); + if (dtm.drawContoursFlag) + blender.addColorOnTop(calculateDTMContourColor(dtm, v, edgePoints, input, height)); + if (dtm.drawOutlineFlag) + blender.addColorOnTop(calculateDTMOutlineColor(dtm, v, edgePoints, input, baryCoord, height)); + float4 dtmColor = blender.blend(); + + textureColor = dtmColor.rgb; + localAlpha = dtmColor.a; + + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + } } else { From 1db627c52e32644bdf276bfcdec4afbf92cf16c8 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 15 Apr 2025 12:46:56 +0330 Subject: [PATCH 044/129] Fix DISCRETE_FIXED_LENGTH_INTERVALS + Refactors --- 62_CAD/CTriangleMesh.h | 31 +- 62_CAD/DrawResourcesFiller.cpp | 22 +- 62_CAD/main.cpp | 46 +- 62_CAD/shaders/globals.hlsl | 48 +- .../main_pipeline/fragment_shader.hlsl | 618 ++++++++++-------- 5 files changed, 418 insertions(+), 347 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index c1dcbca68..1860dedc9 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -8,26 +8,33 @@ using namespace nbl; struct DTMSettingsInfo { - enum E_HEIGHT_SHADING_MODE - { - DISCRETE_VARIABLE_LENGTH_INTERVALS, - DISCRETE_FIXED_LENGTH_INTERVALS, - CONTINOUS_INTERVALS - }; - LineStyleInfo outlineLineStyleInfo; LineStyleInfo contourLineStyleInfo; + uint32_t mode; // E_DTM_MODE + float contourLinesStartHeight; float contourLinesEndHeight; float contourLinesHeightInterval; - - float intervalWidth; + + // Height Shading Mode E_HEIGHT_SHADING_MODE heightShadingMode; - bool drawHeightsFlag; - bool drawContoursFlag; - bool drawOutlineFlag; + // Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + float intervalLength; + + // Converts an interval index to its corresponding height value + // For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0. + // This computed height is later used to determine the interpolated color for shading. + // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight + float intervalIndexToHeightMultiplier; + + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the + // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. + // Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans + // [minHeight, minHeight + intervalLength]. + bool isCenteredShading; void addHeightColorMapEntry(float height, float32_t4 color) { diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c11b0a67f..ad2f160c8 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -632,6 +632,9 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed DTMSettings dtmSettings; + + dtmSettings.mode = dtmSettingsInfo.mode; + dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourLinesStartHeight; dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourLinesEndHeight; dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourLinesHeightInterval; @@ -639,24 +642,23 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineLineStyleInfo); dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourLineStyleInfo); + switch (dtmSettingsInfo.heightShadingMode) { - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: - dtmSettings.intervalWidth = std::numeric_limits::infinity(); + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.intervalLength = std::numeric_limits::infinity(); break; - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: - dtmSettings.intervalWidth = dtmSettingsInfo.intervalWidth; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.intervalLength = dtmSettingsInfo.intervalLength; break; - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: - dtmSettings.intervalWidth = 0.0f; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.intervalLength = 0.0f; break; } + dtmSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.intervalIndexToHeightMultiplier; + dtmSettings.isCenteredShading = static_cast(dtmSettingsInfo.isCenteredShading); _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); - dtmSettings.drawHeightsFlag = static_cast(dtmSettingsInfo.drawHeightsFlag); - dtmSettings.drawContoursFlag = static_cast(dtmSettingsInfo.drawContoursFlag); - dtmSettings.drawOutlineFlag = static_cast(dtmSettingsInfo.drawOutlineFlag); - for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) { const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 48ca4f5a3..16532cba7 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -625,7 +625,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu double m_timeElapsed = 0.0; std::chrono::steady_clock::time_point lastTime; uint32_t m_hatchDebugStep = 0u; - DTMSettingsInfo::E_HEIGHT_SHADING_MODE m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; inline bool onAppInitialized(smart_refctd_ptr&& system) override { @@ -1073,15 +1073,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) { - m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; } if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) { - m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; } if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) { - m_shadingModeExample = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; } } } @@ -3246,6 +3246,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmSettingsInfo; + + dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING; + dtmSettingsInfo.contourLinesStartHeight = 20; dtmSettingsInfo.contourLinesEndHeight = 90; dtmSettingsInfo.contourLinesHeightInterval = 10; @@ -3264,9 +3267,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); - dtmSettingsInfo.drawHeightsFlag = true; - dtmSettingsInfo.drawContoursFlag = true; - dtmSettingsInfo.drawOutlineFlag = true; // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS @@ -3275,9 +3275,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; switch (m_shadingModeExample) { - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { - dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); @@ -3286,21 +3286,27 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { - dtmSettingsInfo.intervalWidth = 8.0f; - dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.intervalLength = 10.0f; + dtmSettingsInfo.intervalIndexToHeightMultiplier = dtmSettingsInfo.intervalLength; + dtmSettingsInfo.isCenteredShading = false; + dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0)); + dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0)); dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } - case DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { - dtmSettingsInfo.heightShadingMode = DTMSettingsInfo::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(30.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0)); + dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } } @@ -3311,7 +3317,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); for (auto& v : mesh.m_vertices) { - v.pos += float64_t2(400.0, 200.0); + v.pos += float64_t2(450.0, 200.0); v.height -= 10.0; } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index bf4c06db7..a83acb094 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -354,15 +354,28 @@ struct LineStyle } }; +enum E_DTM_MODE +{ + OUTLINE = 1 << 0, + CONTOUR = 1 << 1, + HEIGHT_SHADING = 1 << 2, +}; + +enum class E_HEIGHT_SHADING_MODE : uint32_t +{ + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS +}; + +// Documentation and explanation of variables in DTMSettingsInfo struct DTMSettings { const static uint32_t HeightColorMapMaxEntries = 16u; uint32_t outlineLineStyleIdx; // index into line styles uint32_t contourLineStyleIdx; // index into line styles - int drawHeightsFlag; - int drawContoursFlag; - int drawOutlineFlag; + uint32_t mode; // E_DTM_MODE // contour lines float contourLinesStartHeight; @@ -370,27 +383,26 @@ struct DTMSettings float contourLinesHeightInterval; // height-color map - float intervalWidth; + float intervalLength; + float intervalIndexToHeightMultiplier; + int isCenteredShading; + uint32_t heightColorEntryCount; float heightColorMapHeights[HeightColorMapMaxEntries]; float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; - - enum E_HEIGHT_SHADING_MODE - { - DISCRETE_VARIABLE_LENGTH_INTERVALS, - DISCRETE_FIXED_LENGTH_INTERVALS, - CONTINOUS_INTERVALS - }; - + E_HEIGHT_SHADING_MODE determineHeightShadingMode() { - if (nbl::hlsl::isinf(intervalWidth)) - return DISCRETE_VARIABLE_LENGTH_INTERVALS; - if (intervalWidth == 0.0f) - return CONTINOUS_INTERVALS; - - return DISCRETE_FIXED_LENGTH_INTERVALS; + if (nbl::hlsl::isinf(intervalLength)) + return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalLength == 0.0f) + return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; } + + bool drawOutlineEnabled() { return (mode & E_DTM_MODE::OUTLINE) != 0u; } + bool drawContourEnabled() { return (mode & E_DTM_MODE::CONTOUR) != 0u; } + bool drawHeightShadingEnabled() { return (mode & E_DTM_MODE::HEIGHT_SHADING) != 0u; } }; #ifndef __HLSL_VERSION diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index be5e0bf07..dc5262568 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -420,22 +420,80 @@ float dot2(in float2 vec) return dot(vec, vec); } -struct DTMHeightShadingAAInfo + +// TODO: Later move these functions and structs to dtmSettings.hlsl and a namespace like dtmSettings::height_shading or dtmSettings::contours, etc.. + +struct HeightSegmentTransitionData { float currentHeight; float4 currentSegmentColor; - float nearestSegmentHeight; - float4 nearestSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; }; -void calculateBetweenHeightShadingRegionsAntiAliasing(in DTMSettings dtm, in DTMHeightShadingAAInfo aaInfo, in float heightDeriv, out float4 outputColor) +// NOTE[Erfan to Przemek][REMOVE WHEN READ]: I renamed to `smoothHeightSegmentTransition` and made it return value instead of take `out` param + removed applying it to final output color (it's responsibility of the caller now) +// Now the resposibility of this function is just to "Figure out what the interpolated color at the transition is." and doesn't assume how it's gonna be applied to the final color +// that's more predictible and atomic. Additionally I think `out` functions make the code a little bit more unreadable as well + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) { - float pxDistanceToNearestSegment = abs(aaInfo.currentHeight - aaInfo.nearestSegmentHeight) / heightDeriv; + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); - float4 localHeightColor = lerp(aaInfo.nearestSegmentColor, aaInfo.currentSegmentColor, nearestSegmentColorCoverage); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} - outputColor.a *= localHeightColor.a; - outputColor.rgb = localHeightColor.rgb; +// Computes the continuous position of a height value within uniform intervals. +// flooring this value will give the interval index +// +// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the +// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. +// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans +// [minHeight, minHeight + intervalLength]. +// +// Parameters: +// - height: The height value to classify. +// - minHeight: The reference starting height for interval calculation. +// - intervalLength: The length of each interval segment. +// - isCenteredShading: Whether to center the shading intervals around minHeight. +// +// Returns: +// - A float representing the continuous position within the interval grid. +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ( (height - minHeight) / intervalLength + 0.5f); + else + return ( (height - minHeight) / intervalLength ); +} + +void getIntervalHeightAndColor(in int intervalIndex, in DTMSettings dtmSettings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = dtmSettings.heightColorMapHeights[0]; + outIntervalHeight = minShadingHeight + float(intervalIndex) * dtmSettings.intervalIndexToHeightMultiplier; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, dtmSettings.heightColorEntryCount, outIntervalHeight), dtmSettings.heightColorEntryCount-1u); + uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (outIntervalHeight - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } } float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) @@ -447,14 +505,14 @@ float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3 return float3(u, v, w); } -float4 calculateDTMHeightColor(in DTMSettings dtm, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) +float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) { float4 outputColor = float4(0.0f, 0.0f, 0.0f, 1.0f); // HEIGHT SHADING - const uint32_t heightMapSize = dtm.heightColorEntryCount; - float minShadingHeight = dtm.heightColorMapHeights[0]; - float maxShadingHeight = dtm.heightColorMapHeights[heightMapSize - 1]; + const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; + float minShadingHeight = dtmSettings.heightColorMapHeights[0]; + float maxShadingHeight = dtmSettings.heightColorMapHeights[heightMapSize - 1]; if (heightMapSize > 0) { @@ -487,11 +545,11 @@ float4 calculateDTMHeightColor(in DTMSettings dtm, in float3 v[3], in float heig outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); // calculate height color - DTMSettings::E_HEIGHT_SHADING_MODE mode = dtm.determineHeightShadingMode(); + E_HEIGHT_SHADING_MODE mode = dtmSettings.determineHeightShadingMode(); - if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); int mapIndex = max(upperBoundIndex - 1, 0); int mapIndexPrev = max(mapIndex - 1, 0); @@ -501,72 +559,61 @@ float4 calculateDTMHeightColor(in DTMSettings dtm, in float3 v[3], in float heig // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values bool blendWithPrev = (mapIndex > 0) - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtm.heightColorMapHeights[upperBoundIndex] + dtm.heightColorMapHeights[mapIndex])); + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtmSettings.heightColorMapHeights[upperBoundIndex] + dtmSettings.heightColorMapHeights[mapIndex])); - DTMHeightShadingAAInfo aaInfo; - aaInfo.currentHeight = height; - aaInfo.currentSegmentColor = dtm.heightColorMapColors[mapIndex]; - aaInfo.nearestSegmentHeight = blendWithPrev ? dtm.heightColorMapHeights[mapIndex] : dtm.heightColorMapHeights[mapIndexNext]; - aaInfo.nearestSegmentColor = blendWithPrev ? dtm.heightColorMapColors[mapIndexPrev] : dtm.heightColorMapColors[mapIndexNext]; + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = dtmSettings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? dtmSettings.heightColorMapHeights[mapIndex] : dtmSettings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? dtmSettings.heightColorMapColors[mapIndexPrev] : dtmSettings.heightColorMapColors[mapIndexNext]; - calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, heightDeriv, outputColor); + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) { - float interval = dtm.intervalWidth; - float heightMinShadingHeightDiff = (height - minShadingHeight); - int sectionIndex = int(heightMinShadingHeightDiff / interval); - float heightTmp = minShadingHeight + float(sectionIndex) * interval; - - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); - - float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; - - float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; - - float interpolationVal; - bool blendWithPrev; - if (upperBoundHeightIndex == 0) - { - interpolationVal = 1.0f; // TODO: investigate if it is correct - blendWithPrev = false; - } - else - { - interpolationVal = (heightTmp - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - blendWithPrev = height - interval * sectionIndex < 0.5f; // TODO: investigate if it is correct - } + float intervalPosition = getIntervalPosition(height, minShadingHeight, dtmSettings.intervalLength, dtmSettings.isCenteredShading); + float positionWithinInterval = frac(intervalPosition); + int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); - DTMHeightShadingAAInfo aaInfo; - aaInfo.currentHeight = height; - aaInfo.currentSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, dtmSettings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; if (blendWithPrev) { - aaInfo.nearestSegmentHeight = heightTmp; - aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal - 1.0f / interval); + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, dtmSettings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; } else { - aaInfo.nearestSegmentHeight = heightTmp + interval; - aaInfo.nearestSegmentColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal + 1.0f / interval); + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, dtmSettings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); } - calculateBetweenHeightShadingRegionsAntiAliasing(dtm, aaInfo, heightDeriv, outputColor); + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; } - else if (mode == DTMSettings::E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtm }; + DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - float upperBoundHeight = dtm.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtm.heightColorMapHeights[lowerBoundHeightIndex]; + float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; - float4 upperBoundColor = dtm.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtm.heightColorMapColors[lowerBoundHeightIndex]; + float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; float interpolationVal; if (upperBoundHeightIndex == 0) @@ -584,20 +631,20 @@ float4 calculateDTMHeightColor(in DTMSettings dtm, in float3 v[3], in float heig return outputColor; } -float4 calculateDTMContourColor(in DTMSettings dtm, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float height) +float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float height) { float4 outputColor; - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + LineStyle contourStyle = loadLineStyle(dtmSettings.contourLineStyleIdx); const float contourThickness = psInput.getContourLineThickness(); float stretch = 1.0f; float phaseShift = 0.0f; const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); // TODO: move to ubo or push constants - const float startHeight = dtm.contourLinesStartHeight; - const float endHeight = dtm.contourLinesEndHeight; - const float interval = dtm.contourLinesHeightInterval; + const float startHeight = dtmSettings.contourLinesStartHeight; + const float endHeight = dtmSettings.contourLinesEndHeight; + const float interval = dtmSettings.contourLinesHeightInterval; // TODO: can be precomputed const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; @@ -660,11 +707,11 @@ float4 calculateDTMContourColor(in DTMSettings dtm, in float3 v[3], in uint2 edg return outputColor; } -float4 calculateDTMOutlineColor(in DTMSettings dtm, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float3 baryCoord, in float height) +float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float3 baryCoord, in float height) { float4 outputColor; - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + LineStyle outlineStyle = loadLineStyle(dtmSettings.outlineLineStyleIdx); const float outlineThickness = psInput.getOutlineThickness(); const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); @@ -792,270 +839,267 @@ float4 fragMain(PSInput input) : SV_TARGET if (pc.isDTMRendering) { - // TRIANGLE RENDERING - { - DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - - float3 v[3]; - v[0] = input.getScreenSpaceVertexAttribs(0); - v[1] = input.getScreenSpaceVertexAttribs(1); - v[2] = input.getScreenSpaceVertexAttribs(2); - - // indices of points constructing every edge - uint2 edgePoints[3]; - edgePoints[0] = uint2(0, 1); - edgePoints[1] = uint2(1, 2); - edgePoints[2] = uint2(2, 0); - - const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); - float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; - float heightDeriv = fwidth(height); - - DTMColorBlender blender; - blender.init(); - if(dtm.drawHeightsFlag) - blender.addColorOnTop(calculateDTMHeightColor(dtm, v, heightDeriv, input.position.xy, height)); - if (dtm.drawContoursFlag) - blender.addColorOnTop(calculateDTMContourColor(dtm, v, edgePoints, input, height)); - if (dtm.drawOutlineFlag) - blender.addColorOnTop(calculateDTMOutlineColor(dtm, v, edgePoints, input, baryCoord, height)); - float4 dtmColor = blender.blend(); - - textureColor = dtmColor.rgb; - localAlpha = dtmColor.a; - - return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); - } + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + float3 v[3]; + v[0] = input.getScreenSpaceVertexAttribs(0); + v[1] = input.getScreenSpaceVertexAttribs(1); + v[2] = input.getScreenSpaceVertexAttribs(2); + + // indices of points constructing every edge + uint2 edgePoints[3]; + edgePoints[0] = uint2(0, 1); + edgePoints[1] = uint2(1, 2); + edgePoints[2] = uint2(2, 0); + + const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); + float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + float heightDeriv = fwidth(height); + + DTMColorBlender blender; + blender.init(); + if(dtmSettings.drawHeightShadingEnabled()) + blender.addColorOnTop(calculateDTMHeightColor(dtmSettings, v, heightDeriv, input.position.xy, height)); + if (dtmSettings.drawContourEnabled()) + blender.addColorOnTop(calculateDTMContourColor(dtmSettings, v, edgePoints, input, height)); + if (dtmSettings.drawOutlineEnabled()) + blender.addColorOnTop(calculateDTMOutlineColor(dtmSettings, v, edgePoints, input, baryCoord, height)); + float4 dtmColor = blender.blend(); + + textureColor = dtmColor.rgb; + localAlpha = dtmColor.a; + + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); } else { // figure out local alpha with sdf if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) - { - float distance = nbl::hlsl::numeric_limits::max; - if (objType == ObjectType::LINE) { - const float2 start = input.getLineStart(); - const float2 end = input.getLineEnd(); - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + float distance = nbl::hlsl::numeric_limits::max; + if (objType == ObjectType::LINE) + { + const float2 start = input.getLineStart(); + const float2 end = input.getLineEnd(); + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - LineStyle style = loadLineStyle(styleIdx); + LineStyle style = loadLineStyle(styleIdx); - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } } - else + else if (objType == ObjectType::QUAD_BEZIER) { - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::QUAD_BEZIER) - { - nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); - nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); + nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); + nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - LineStyle style = loadLineStyle(styleIdx); - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); + LineStyle style = loadLineStyle(styleIdx); + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } } - else + else if (objType == ObjectType::POLYLINE_CONNECTOR) { - BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); - distance = miterSDF( - P, - input.getLineThickness(), - input.getPolylineConnectorTrapezoidStart(), - input.getPolylineConnectorTrapezoidEnd(), - input.getPolylineConnectorTrapezoidLongBase(), - input.getPolylineConnectorTrapezoidShortBase()); + const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); + distance = miterSDF( + P, + input.getLineThickness(), + input.getPolylineConnectorTrapezoidStart(), + input.getPolylineConnectorTrapezoidEnd(), + input.getPolylineConnectorTrapezoidLongBase(), + input.getPolylineConnectorTrapezoidShortBase()); + } + localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); } - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); - } else if (objType == ObjectType::CURVE_BOX) - { - const float minorBBoxUV = input.getMinorBBoxUV(); - const float majorBBoxUV = input.getMajorBBoxUV(); + { + const float minorBBoxUV = input.getMinorBBoxUV(); + const float majorBBoxUV = input.getMajorBBoxUV(); - nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); - nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); - nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); - nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); + nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); + nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); + nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); + nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); - // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. - nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. + nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); - const float minEv = curveMinMinor.evaluate(minT); + const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); + const float minEv = curveMinMinor.evaluate(minT); - const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); - const float maxEv = curveMaxMinor.evaluate(maxT); + const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); + const float maxEv = curveMaxMinor.evaluate(maxT); - const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; - const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; + const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; + const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; - if (insideMinor && insideMajor) - { - localAlpha = 1.0; - } - else - { - // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) - // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor - const float InvalidT = nbl::hlsl::numeric_limits::max; - const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; + if (insideMinor && insideMajor) + { + localAlpha = 1.0; + } + else + { + // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) + // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor + const float InvalidT = nbl::hlsl::numeric_limits::max; + const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; - const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); + const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); - float closestDistanceSquared = MAX_DISTANCE_SQUARED; - const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; + float closestDistanceSquared = MAX_DISTANCE_SQUARED; + const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; - if (minorBBoxUV < minEv) - { - // DO SDF of Min Curve - nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, - float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, - float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); - - nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + if (minorBBoxUV < minEv) { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = minCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; + // DO SDF of Min Curve + nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, + float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, + float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); + + nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = minCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } } - } - else if (minorBBoxUV > maxEv) - { - // Do SDF of Max Curve - nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, - float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, - float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); - nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + else if (minorBBoxUV > maxEv) { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; + // Do SDF of Max Curve + nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, + float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, + float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); + nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } } - } - if (!insideMajor) - { - const bool minLessThanMax = minEv < maxEv; - float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); - if (majorBBoxUV > 1.0) + if (!insideMajor) { - const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveEnd; - } - else - { - const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); + const bool minLessThanMax = minEv < maxEv; + float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); + if (majorBBoxUV > 1.0) + { + const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveEnd; + } else - majorDistVector = pos - minCurveStart; + { + const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveStart; + } + + const float majorDistSq = dot(majorDistVector, majorDistVector); + if (majorDistSq < closestDistanceSquared) + closestDistanceSquared = majorDistSq; } - const float majorDistSq = dot(majorDistVector, majorDistVector); - if (majorDistSq < closestDistanceSquared) - closestDistanceSquared = majorDistSq; + const float dist = sqrt(closestDistanceSquared); + localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); } - const float dist = sqrt(closestDistanceSquared); - localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); - } - - LineStyle style = loadLineStyle(mainObj.styleIdx); - uint32_t textureId = asuint(style.screenSpaceLineWidth); - if (textureId != InvalidTextureIdx) - { - // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected - // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); - localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); + LineStyle style = loadLineStyle(mainObj.styleIdx); + uint32_t textureId = asuint(style.screenSpaceLineWidth); + if (textureId != InvalidTextureIdx) + { + // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected + // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); + localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); + } } - } else if (objType == ObjectType::FONT_GLYPH) - { - const float2 uv = input.getFontGlyphUV(); - const uint32_t textureId = input.getFontGlyphTextureId(); - - if (textureId != InvalidTextureIdx) { - float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); - /* - explaining "*= exp2(max(mipLevel,0.0))" - Each mip level has constant MSDFPixelRange - Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip - As we go up 1 mip level, the msdf distance should be multiplied by 2.0 - While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. - It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! + const float2 uv = input.getFontGlyphUV(); + const uint32_t textureId = input.getFontGlyphTextureId(); + + if (textureId != InvalidTextureIdx) + { + float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); + /* + explaining "*= exp2(max(mipLevel,0.0))" + Each mip level has constant MSDFPixelRange + Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip + As we go up 1 mip level, the msdf distance should be multiplied by 2.0 + While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. + It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! - Alternatively you can think of it as doing this instead: - localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); - Which is reducing the aa feathering as we go up the mip levels. - to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph - */ - msdf *= exp2(max(mipLevel,0.0)); + Alternatively you can think of it as doing this instead: + localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); + Which is reducing the aa feathering as we go up the mip levels. + to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph + */ + msdf *= exp2(max(mipLevel,0.0)); - LineStyle style = loadLineStyle(mainObj.styleIdx); - const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; - const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle - localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); + LineStyle style = loadLineStyle(mainObj.styleIdx); + const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; + const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle + localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); + } } - } else if (objType == ObjectType::IMAGE) - { - const float2 uv = input.getImageUV(); - const uint32_t textureId = input.getImageTextureId(); - - if (textureId != InvalidTextureIdx) { - float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); - textureColor = colorSample.rgb; - localAlpha = colorSample.a; + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIdx) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } } - } uint2 fragCoord = uint2(input.position.xy); From 432b931caa1192bf6982178af4a0c9686d7222ba Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 15 Apr 2025 12:55:45 +0330 Subject: [PATCH 045/129] clear function for Mesh --- 62_CAD/CTriangleMesh.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 1860dedc9..1753687b2 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -110,6 +110,12 @@ class CTriangleMesh final { return m_indices.size(); } + + inline void clear() + { + m_vertices.clear(); + m_indices.clear(); + } core::vector m_vertices; core::vector m_indices; From 88dcf44da0465b3e04a7255ba26a67fa342b5552 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 15 Apr 2025 13:04:24 +0200 Subject: [PATCH 046/129] Fixed non-stippled lines --- 62_CAD/main.cpp | 28 ++++---- .../main_pipeline/fragment_shader.hlsl | 71 +++++++------------ 2 files changed, 41 insertions(+), 58 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 16532cba7..c16f17c2d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3247,7 +3247,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu DTMSettingsInfo dtmSettingsInfo; - dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING; + dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; dtmSettingsInfo.contourLinesStartHeight = 20; dtmSettingsInfo.contourLinesEndHeight = 90; @@ -3256,9 +3256,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo outlineStyle = {}; dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 3.0f; - dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 0.5f); - std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; - dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); + dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + //std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; + //dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); LineStyleInfo contourStyle = {}; dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; @@ -3292,21 +3292,21 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmSettingsInfo.intervalIndexToHeightMultiplier = dtmSettingsInfo.intervalLength; dtmSettingsInfo.isCenteredShading = false; dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index dc5262568..d7a9493b2 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -431,10 +431,6 @@ struct HeightSegmentTransitionData float4 otherSegmentColor; }; -// NOTE[Erfan to Przemek][REMOVE WHEN READ]: I renamed to `smoothHeightSegmentTransition` and made it return value instead of take `out` param + removed applying it to final output color (it's responsibility of the caller now) -// Now the resposibility of this function is just to "Figure out what the interpolated color at the transition is." and doesn't assume how it's gonna be applied to the final color -// that's more predictible and atomic. Additionally I think `out` functions make the code a little bit more unreadable as well - // This function interpolates between the current and nearest segment colors based on the // screen-space distance to the segment boundary. The result is a smoothly blended color // useful for visualizing discrete height levels without harsh edges. @@ -682,6 +678,7 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u } } + if(contourLinePointsIdx == 2) { nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); @@ -746,16 +743,27 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u float ABLen = length(AB); float triangleHeightToOpositeVertex = triangleAreaTimesTwo / ABLen; - distances[i] = triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]; + distances[i] = abs(triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]); } float minDistance = nbl::hlsl::numeric_limits::max; if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) { - for (uint i = 0; i < 3; ++i) - distances[i] -= outlineThickness; + for (int i = 0; i < 3; ++i) + { + if (distances[i] > outlineThickness) + continue; - minDistance = min(distances[0], min(distances[1], distances[2])); + const uint2 currentEdgePoints = edgePoints[i]; + float3 p0 = v[currentEdgePoints[0]]; + float3 p1 = v[currentEdgePoints[1]]; + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + distance = ClippedSignedDistance >::sdf(lineSegment, psInput.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } } else { @@ -794,37 +802,14 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u return outputColor; } -struct DTMColorBlender +float4 blendColorOnTop(in float4 colorBelow, in float4 colorAbove) { - void init() - { - colorCount = 0; - } - - void addColorOnTop(in float4 color) - { - colors[colorCount] = color; - colorCount++; - } - - float4 blend() - { - if (colorCount == 0) - return float4(0.0f, 0.0f, 0.0f, 1.0f); - - float4 outputColor = colors[0]; - for (int i = 1; i < colorCount; ++i) - { - outputColor.rgb = colors[i].rgb * colors[i].a + outputColor.rgb * outputColor.a * (1.0f - colors[i].a); - outputColor.a = colors[i].a + outputColor.a * (1.0f - colors[i].a); - } - - return outputColor; - } + float4 outputColor = colorBelow; + outputColor.rgb = colorAbove.rgb * colorAbove.a + outputColor.rgb * outputColor.a * (1.0f - colorAbove.a); + outputColor.a = colorAbove.a + outputColor.a * (1.0f - colorAbove.a); - int colorCount; - float4 colors[3]; -}; + return outputColor; +} [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] @@ -856,15 +841,13 @@ float4 fragMain(PSInput input) : SV_TARGET float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; float heightDeriv = fwidth(height); - DTMColorBlender blender; - blender.init(); - if(dtmSettings.drawHeightShadingEnabled()) - blender.addColorOnTop(calculateDTMHeightColor(dtmSettings, v, heightDeriv, input.position.xy, height)); + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dtmSettings.drawHeightShadingEnabled()) + dtmColor = blendColorOnTop(dtmColor, calculateDTMHeightColor(dtmSettings, v, heightDeriv, input.position.xy, height)); if (dtmSettings.drawContourEnabled()) - blender.addColorOnTop(calculateDTMContourColor(dtmSettings, v, edgePoints, input, height)); + dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings, v, edgePoints, input, height)); if (dtmSettings.drawOutlineEnabled()) - blender.addColorOnTop(calculateDTMOutlineColor(dtmSettings, v, edgePoints, input, baryCoord, height)); - float4 dtmColor = blender.blend(); + dtmColor = blendColorOnTop(dtmColor, calculateDTMOutlineColor(dtmSettings, v, edgePoints, input, baryCoord, height)); textureColor = dtmColor.rgb; localAlpha = dtmColor.a; From 906443947fb830c1d1d75ed48f3d47e97b6b335c Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 15 Apr 2025 14:44:04 +0330 Subject: [PATCH 047/129] getIntervalHeightAndColor small fix --- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index d7a9493b2..f9cd52ec3 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -469,10 +469,15 @@ float getIntervalPosition(in float height, in float minHeight, in float interval void getIntervalHeightAndColor(in int intervalIndex, in DTMSettings dtmSettings, out float4 outIntervalColor, out float outIntervalHeight) { float minShadingHeight = dtmSettings.heightColorMapHeights[0]; - outIntervalHeight = minShadingHeight + float(intervalIndex) * dtmSettings.intervalIndexToHeightMultiplier; + float heightForColor = minShadingHeight + float(intervalIndex) * dtmSettings.intervalIndexToHeightMultiplier; + + if (dtmSettings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * dtmSettings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * dtmSettings.intervalLength; DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; - uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, dtmSettings.heightColorEntryCount, outIntervalHeight), dtmSettings.heightColorEntryCount-1u); + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, dtmSettings.heightColorEntryCount, heightForColor), dtmSettings.heightColorEntryCount-1u); uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; @@ -487,7 +492,7 @@ void getIntervalHeightAndColor(in int intervalIndex, in DTMSettings dtmSettings, } else { - float interpolationVal = (outIntervalHeight - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); } } From 9707b33b35b9f2cf4ddacecd64d55434144de7b3 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 15 Apr 2025 15:43:44 +0200 Subject: [PATCH 048/129] Separated DTMSettingsInfo struct --- 62_CAD/CTriangleMesh.h | 39 ++++-- 62_CAD/DrawResourcesFiller.cpp | 71 +++++++---- 62_CAD/DrawResourcesFiller.h | 9 +- 62_CAD/main.cpp | 114 +++++++++++------- .../shaders/main_pipeline/vertex_shader.hlsl | 1 - 5 files changed, 149 insertions(+), 85 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 1753687b2..0740cf114 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -6,17 +6,9 @@ using namespace nbl; -struct DTMSettingsInfo +struct DTMHeightShadingInfo { - LineStyleInfo outlineLineStyleInfo; - LineStyleInfo contourLineStyleInfo; - - uint32_t mode; // E_DTM_MODE - - float contourLinesStartHeight; - float contourLinesEndHeight; - float contourLinesHeightInterval; - + bool enabled; // Height Shading Mode E_HEIGHT_SHADING_MODE heightShadingMode; @@ -28,7 +20,7 @@ struct DTMSettingsInfo // This computed height is later used to determine the interpolated color for shading. // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight float intervalIndexToHeightMultiplier; - + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. @@ -58,7 +50,7 @@ struct DTMSettingsInfo return true; } - + private: struct HeightColor { @@ -74,6 +66,29 @@ struct DTMSettingsInfo std::set heightColorSet; }; +struct DTMContourInfo +{ + bool enabled; + LineStyleInfo lineStyleInfo; + + float startHeight; + float endHeight; + float heightInterval; +}; + +struct DTMOutlineInfo +{ + bool enabled; + LineStyleInfo lineStyleInfo; +}; + +struct DTMSettingsInfo +{ + DTMHeightShadingInfo heightShadingInfo; + DTMContourInfo contourInfo; + DTMOutlineInfo outlineInfo; +}; + class CTriangleMesh final { public: diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ad2f160c8..4085b4d30 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -134,11 +134,16 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS } } -void DrawResourcesFiller::drawTriangleMesh(const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMHeightShadingInfo& dtmHeightShadingInfo, + const DTMContourInfo& dtmContourInfo, + const DTMOutlineInfo& dtmOutlineInfo, + SIntendedSubmitInfo& intendedNextSubmit) { flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls - setActiveDTMSettings(dtmSettingsInfo); + setActiveDTMSettings(dtmHeightShadingInfo, dtmContourInfo, dtmOutlineInfo); // TODO !!!! beginMainObject(MainObjectType::DTM); DrawCallData drawCallData = {}; @@ -352,9 +357,14 @@ void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) activeLineStyleIndex = InvalidStyleIdx; } -void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettings) +void DrawResourcesFiller::setActiveDTMSettings(const DTMHeightShadingInfo& heightShadingInfo, const DTMContourInfo& contourInfo, const DTMOutlineInfo& outlineInfo) { - activeDTMSettings = dtmSettings; + DTMSettingsInfo dtmSettingsInfo; + dtmSettingsInfo.heightShadingInfo = heightShadingInfo; + dtmSettingsInfo.contourInfo = contourInfo; + dtmSettingsInfo.outlineInfo = outlineInfo; + + activeDTMSettings = dtmSettingsInfo; activeDTMSettingsIndex = InvalidDTMSettingsIdx; } @@ -633,31 +643,42 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm DTMSettings dtmSettings; - dtmSettings.mode = dtmSettingsInfo.mode; - - dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourLinesStartHeight; - dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourLinesEndHeight; - dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourLinesHeightInterval; - - dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineLineStyleInfo); - dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourLineStyleInfo); + ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + dtmSettings.mode = 0u; + if (dtmSettingsInfo.heightShadingInfo.enabled) + { + dtmSettings.mode |= E_DTM_MODE::HEIGHT_SHADING; - switch (dtmSettingsInfo.heightShadingMode) + switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.intervalLength = std::numeric_limits::infinity(); + break; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + break; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.intervalLength = 0.0f; + break; + } + dtmSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); + } + if (dtmSettingsInfo.contourInfo.enabled) + { + dtmSettings.mode |= E_DTM_MODE::CONTOUR; + dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourInfo.startHeight; + dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourInfo.endHeight; + dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourInfo.heightInterval; + dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourInfo.lineStyleInfo); + } + if (dtmSettingsInfo.outlineInfo.enabled) { - case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: - dtmSettings.intervalLength = std::numeric_limits::infinity(); - break; - case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: - dtmSettings.intervalLength = dtmSettingsInfo.intervalLength; - break; - case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: - dtmSettings.intervalLength = 0.0f; - break; + dtmSettings.mode |= E_DTM_MODE::OUTLINE; + dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineInfo.lineStyleInfo); } - dtmSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.intervalIndexToHeightMultiplier; - dtmSettings.isCenteredShading = static_cast(dtmSettingsInfo.isCenteredShading); - _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 846046a43..f0618fd27 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -155,7 +155,12 @@ struct DrawResourcesFiller /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); - void drawTriangleMesh(const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMHeightShadingInfo& dtmHeightShadingInfo, + const DTMContourInfo& dtmContourInfo, + const DTMOutlineInfo& dtmOutlineInfo, + SIntendedSubmitInfo& intendedNextSubmit); // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( @@ -223,7 +228,7 @@ struct DrawResourcesFiller // Setting Active Resources: void setActiveLineStyle(const LineStyleInfo& lineStyle); - void setActiveDTMSettings(const DTMSettingsInfo& dtmSettings); + void setActiveDTMSettings(const DTMHeightShadingInfo& heightShadingInfo, const DTMContourInfo& contourInfo, const DTMOutlineInfo& outlineInfo); void beginMainObject(MainObjectType type); void endMainObject(); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c16f17c2d..da7ceb275 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3245,83 +3245,107 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); - DTMSettingsInfo dtmSettingsInfo; - - dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - - dtmSettingsInfo.contourLinesStartHeight = 20; - dtmSettingsInfo.contourLinesEndHeight = 90; - dtmSettingsInfo.contourLinesHeightInterval = 10; - - LineStyleInfo outlineStyle = {}; - dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 3.0f; - dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); - //std::array outlineStipplePattern = { 0.0f, -5.0f, 2.0f, -5.0f }; + // TODO: remove + //DTMSettingsInfo dtmSettingsInfo; + // + ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + //dtmSettingsInfo.mode = E_DTM_MODE::CONTOUR; + //dtmSettingsInfo.contourLinesStartHeight = 20; + //dtmSettingsInfo.contourLinesEndHeight = 90; + //dtmSettingsInfo.contourLinesHeightInterval = 10; + + //LineStyleInfo outlineStyle = {}; + //dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 3.0f; + //dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 0.0f; + //dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + //std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; //dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); - LineStyleInfo contourStyle = {}; - dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 1.0f; - dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + //LineStyleInfo contourStyle = {}; + //dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; + //dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 1.0f; + //dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + //std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + //dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); + + DTMOutlineInfo outlineInfo; + outlineInfo.enabled = true; + outlineInfo.lineStyleInfo.screenSpaceLineWidth = 3.0f; + outlineInfo.lineStyleInfo.worldSpaceLineWidth = 0.0f; + outlineInfo.lineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + outlineInfo.lineStyleInfo.setStipplePatternData(outlineStipplePattern); + + DTMContourInfo contourInfo; + contourInfo.enabled = true; + contourInfo.startHeight = 20; + contourInfo.endHeight = 90; + contourInfo.heightInterval = 10; + contourInfo.lineStyleInfo.screenSpaceLineWidth = 0.0f; + contourInfo.lineStyleInfo.worldSpaceLineWidth = 1.0f; + contourInfo.lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; - dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); - + contourInfo.lineStyleInfo.setStipplePatternData(contourStipplePattern); // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS // 2 - DISCRETE_FIXED_LENGTH_INTERVALS // 3 - CONTINOUS_INTERVALS float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + DTMHeightShadingInfo heightShadingInfo; + heightShadingInfo.enabled = true; switch (m_shadingModeExample) { case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { - dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - - dtmSettingsInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + break; } case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { - dtmSettingsInfo.intervalLength = 10.0f; - dtmSettingsInfo.intervalIndexToHeightMultiplier = dtmSettingsInfo.intervalLength; - dtmSettingsInfo.isCenteredShading = false; - dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + heightShadingInfo.intervalLength = 10.0f; + heightShadingInfo.intervalIndexToHeightMultiplier = heightShadingInfo.intervalLength; + heightShadingInfo.isCenteredShading = false; + heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + break; } case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { - dtmSettingsInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmSettingsInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmSettingsInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + break; } } - drawResourcesFiller.drawTriangleMesh(mesh, dtmSettingsInfo, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, heightShadingInfo, contourInfo, outlineInfo, intendedNextSubmit); - dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); - dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); + contourInfo.lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + outlineInfo.lineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); for (auto& v : mesh.m_vertices) { v.pos += float64_t2(450.0, 200.0); v.height -= 10.0; } - drawResourcesFiller.drawTriangleMesh(mesh, dtmSettingsInfo, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, heightShadingInfo, contourInfo, outlineInfo, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 20c29f16a..f726104b5 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -146,7 +146,6 @@ PSInput main(uint vertexID : SV_VertexID) (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) ); - // TODO: line style of contour line has to be set too! DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); From fdbf20c7d701063ab93a4c00f575d3aceb319700 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 17 Apr 2025 11:38:23 +0200 Subject: [PATCH 049/129] Fixes --- 62_CAD/CTriangleMesh.h | 12 +--- 62_CAD/DrawResourcesFiller.cpp | 27 +++------ 62_CAD/DrawResourcesFiller.h | 6 +- 62_CAD/main.cpp | 106 +++++++++++++-------------------- 4 files changed, 53 insertions(+), 98 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 0740cf114..16995c28a 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -8,7 +8,6 @@ using namespace nbl; struct DTMHeightShadingInfo { - bool enabled; // Height Shading Mode E_HEIGHT_SHADING_MODE heightShadingMode; @@ -68,7 +67,6 @@ struct DTMHeightShadingInfo struct DTMContourInfo { - bool enabled; LineStyleInfo lineStyleInfo; float startHeight; @@ -76,17 +74,13 @@ struct DTMContourInfo float heightInterval; }; -struct DTMOutlineInfo -{ - bool enabled; - LineStyleInfo lineStyleInfo; -}; - struct DTMSettingsInfo { + uint32_t mode = 0u; + DTMHeightShadingInfo heightShadingInfo; DTMContourInfo contourInfo; - DTMOutlineInfo outlineInfo; + LineStyleInfo outlineStyleInfo; }; class CTriangleMesh final diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 4085b4d30..a255bc700 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -136,14 +136,12 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS void DrawResourcesFiller::drawTriangleMesh( const CTriangleMesh& mesh, - const DTMHeightShadingInfo& dtmHeightShadingInfo, - const DTMContourInfo& dtmContourInfo, - const DTMOutlineInfo& dtmOutlineInfo, + const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls - setActiveDTMSettings(dtmHeightShadingInfo, dtmContourInfo, dtmOutlineInfo); // TODO !!!! + setActiveDTMSettings(dtmSettingsInfo); // TODO !!!! beginMainObject(MainObjectType::DTM); DrawCallData drawCallData = {}; @@ -357,13 +355,8 @@ void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) activeLineStyleIndex = InvalidStyleIdx; } -void DrawResourcesFiller::setActiveDTMSettings(const DTMHeightShadingInfo& heightShadingInfo, const DTMContourInfo& contourInfo, const DTMOutlineInfo& outlineInfo) +void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo) { - DTMSettingsInfo dtmSettingsInfo; - dtmSettingsInfo.heightShadingInfo = heightShadingInfo; - dtmSettingsInfo.contourInfo = contourInfo; - dtmSettingsInfo.outlineInfo = outlineInfo; - activeDTMSettings = dtmSettingsInfo; activeDTMSettingsIndex = InvalidDTMSettingsIdx; } @@ -645,11 +638,9 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - dtmSettings.mode = 0u; - if (dtmSettingsInfo.heightShadingInfo.enabled) + dtmSettings.mode = dtmSettingsInfo.mode; + if (dtmSettings.mode & E_DTM_MODE::HEIGHT_SHADING) { - dtmSettings.mode |= E_DTM_MODE::HEIGHT_SHADING; - switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) { case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: @@ -666,18 +657,16 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm dtmSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); } - if (dtmSettingsInfo.contourInfo.enabled) + if (dtmSettings.mode & E_DTM_MODE::CONTOUR) { - dtmSettings.mode |= E_DTM_MODE::CONTOUR; dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourInfo.startHeight; dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourInfo.endHeight; dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourInfo.heightInterval; dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourInfo.lineStyleInfo); } - if (dtmSettingsInfo.outlineInfo.enabled) + if (dtmSettings.mode & E_DTM_MODE::OUTLINE) { - dtmSettings.mode |= E_DTM_MODE::OUTLINE; - dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineInfo.lineStyleInfo); + dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineStyleInfo); } for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f0618fd27..196ba6885 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -157,9 +157,7 @@ struct DrawResourcesFiller void drawTriangleMesh( const CTriangleMesh& mesh, - const DTMHeightShadingInfo& dtmHeightShadingInfo, - const DTMContourInfo& dtmContourInfo, - const DTMOutlineInfo& dtmOutlineInfo, + const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); // ! Convinience function for Hatch with MSDF Pattern and a solid background @@ -228,7 +226,7 @@ struct DrawResourcesFiller // Setting Active Resources: void setActiveLineStyle(const LineStyleInfo& lineStyle); - void setActiveDTMSettings(const DTMHeightShadingInfo& heightShadingInfo, const DTMContourInfo& contourInfo, const DTMOutlineInfo& outlineInfo); + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); void beginMainObject(MainObjectType type); void endMainObject(); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index da7ceb275..6d3a2b431 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3245,107 +3245,81 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); - // TODO: remove - //DTMSettingsInfo dtmSettingsInfo; - // - ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - //dtmSettingsInfo.mode = E_DTM_MODE::CONTOUR; - //dtmSettingsInfo.contourLinesStartHeight = 20; - //dtmSettingsInfo.contourLinesEndHeight = 90; - //dtmSettingsInfo.contourLinesHeightInterval = 10; - - //LineStyleInfo outlineStyle = {}; - //dtmSettingsInfo.outlineLineStyleInfo.screenSpaceLineWidth = 3.0f; - //dtmSettingsInfo.outlineLineStyleInfo.worldSpaceLineWidth = 0.0f; - //dtmSettingsInfo.outlineLineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); - //std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; - //dtmSettingsInfo.outlineLineStyleInfo.setStipplePatternData(outlineStipplePattern); - - //LineStyleInfo contourStyle = {}; - //dtmSettingsInfo.contourLineStyleInfo.screenSpaceLineWidth = 0.0f; - //dtmSettingsInfo.contourLineStyleInfo.worldSpaceLineWidth = 1.0f; - //dtmSettingsInfo.contourLineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); - //std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; - //dtmSettingsInfo.contourLineStyleInfo.setStipplePatternData(contourStipplePattern); - - DTMOutlineInfo outlineInfo; - outlineInfo.enabled = true; - outlineInfo.lineStyleInfo.screenSpaceLineWidth = 3.0f; - outlineInfo.lineStyleInfo.worldSpaceLineWidth = 0.0f; - outlineInfo.lineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + DTMSettingsInfo dtmInfo; + dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 3.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; - outlineInfo.lineStyleInfo.setStipplePatternData(outlineStipplePattern); - - DTMContourInfo contourInfo; - contourInfo.enabled = true; - contourInfo.startHeight = 20; - contourInfo.endHeight = 90; - contourInfo.heightInterval = 10; - contourInfo.lineStyleInfo.screenSpaceLineWidth = 0.0f; - contourInfo.lineStyleInfo.worldSpaceLineWidth = 1.0f; - contourInfo.lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourInfo.startHeight = 20; + dtmInfo.contourInfo.endHeight = 90; + dtmInfo.contourInfo.heightInterval = 10; + dtmInfo.contourInfo.lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourInfo.lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourInfo.lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; - contourInfo.lineStyleInfo.setStipplePatternData(contourStipplePattern); + dtmInfo.contourInfo.lineStyleInfo.setStipplePatternData(contourStipplePattern); // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS // 2 - DISCRETE_FIXED_LENGTH_INTERVALS // 3 - CONTINOUS_INTERVALS float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; - DTMHeightShadingInfo heightShadingInfo; - heightShadingInfo.enabled = true; switch (m_shadingModeExample) { case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: { - heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); - heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: { - heightShadingInfo.intervalLength = 10.0f; - heightShadingInfo.intervalIndexToHeightMultiplier = heightShadingInfo.intervalLength; - heightShadingInfo.isCenteredShading = false; - heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { - heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } } - drawResourcesFiller.drawTriangleMesh(mesh, heightShadingInfo, contourInfo, outlineInfo, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); - contourInfo.lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); - outlineInfo.lineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); + dtmInfo.contourInfo.lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); for (auto& v : mesh.m_vertices) { v.pos += float64_t2(450.0, 200.0); v.height -= 10.0; } - drawResourcesFiller.drawTriangleMesh(mesh, heightShadingInfo, contourInfo, outlineInfo, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); From c8653573a6b93a3962de2f0b5662cb630e9ee51b Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 19 Apr 2025 16:38:55 +0200 Subject: [PATCH 050/129] Saving work --- 62_CAD/DrawResourcesFiller.cpp | 4 +- 62_CAD/main.cpp | 10 ++-- 62_CAD/shaders/globals.hlsl | 46 +++++++++++++++++-- .../main_pipeline/fragment_shader.hlsl | 16 +++---- .../shaders/main_pipeline/vertex_shader.hlsl | 29 ++++++------ 5 files changed, 69 insertions(+), 36 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index a255bc700..c6d898a7c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -141,7 +141,7 @@ void DrawResourcesFiller::drawTriangleMesh( { flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls - setActiveDTMSettings(dtmSettingsInfo); // TODO !!!! + setActiveDTMSettings(dtmSettingsInfo); beginMainObject(MainObjectType::DTM); DrawCallData drawCallData = {}; @@ -675,7 +675,7 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm if (itr == dtmSettings) return i; } - + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers } diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 6d3a2b431..f988458b9 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3246,10 +3246,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo; - dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - - dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 3.0f; - dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 0.0f; + //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 3.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); @@ -3274,7 +3274,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index a83acb094..2c86b9732 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -400,9 +400,9 @@ struct DTMSettings return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; } - bool drawOutlineEnabled() { return (mode & E_DTM_MODE::OUTLINE) != 0u; } - bool drawContourEnabled() { return (mode & E_DTM_MODE::CONTOUR) != 0u; } - bool drawHeightShadingEnabled() { return (mode & E_DTM_MODE::HEIGHT_SHADING) != 0u; } + bool drawOutlineEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::OUTLINE) != 0u; } + bool drawContourEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::CONTOUR) != 0u; } + bool drawHeightShadingEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::HEIGHT_SHADING) != 0u; } }; #ifndef __HLSL_VERSION @@ -430,8 +430,44 @@ inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) { - return lhs.outlineLineStyleIdx == rhs.outlineLineStyleIdx && - lhs.contourLineStyleIdx == rhs.contourLineStyleIdx; + if (lhs.mode != rhs.mode) + return false; + + bool equal = true; + if (lhs.drawOutlineEnabled()) + { + equal = lhs.outlineLineStyleIdx == rhs.outlineLineStyleIdx; + } + + if (!equal) + return false; + + if (lhs.drawContourEnabled()) + { + float contourLinesStartHeight; + float contourLinesEndHeight; + float contourLinesHeightInterval; + + equal = lhs.contourLinesStartHeight == rhs.contourLinesStartHeight && + lhs.contourLinesStartHeight == rhs.contourLinesStartHeight && + lhs.contourLinesStartHeight == rhs.contourLinesStartHeight; + } + + if (!equal) + return false; + + if (lhs.drawHeightShadingEnabled()) + { + equal = lhs.intervalLength == rhs.intervalLength && + lhs.intervalIndexToHeightMultiplier == rhs.intervalIndexToHeightMultiplier && + lhs.isCenteredShading == rhs.isCenteredShading && + lhs.heightColorEntryCount == rhs.heightColorEntryCount; + + equal == equal && (memcmp(lhs.heightColorMapHeights, rhs.heightColorMapHeights, lhs.heightColorEntryCount * sizeof(float))); + equal == equal && (memcmp(lhs.heightColorMapColors, rhs.heightColorMapColors, lhs.heightColorEntryCount * sizeof(float32_t4))); + } + + return equal; } #endif diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index f9cd52ec3..2b44f24b6 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -508,7 +508,7 @@ float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3 float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) { - float4 outputColor = float4(0.0f, 0.0f, 0.0f, 1.0f); + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); // HEIGHT SHADING const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; @@ -542,7 +542,6 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl convexPolygonSdf = max(convexPolygonSdf, line3Sdf); convexPolygonSdf = max(convexPolygonSdf, line4Sdf); - // TODO: separate outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); // calculate height color @@ -683,6 +682,7 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u } } + // TODO: comment next line to fix, figure if it was needed if(contourLinePointsIdx == 2) { nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); @@ -704,9 +704,11 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; outputColor.rgb = contourStyle.color.rgb; - } - return outputColor; + return outputColor; + } + + return float4(0.0f, 0.0f, 0.0f, 0.0f); } float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float3 baryCoord, in float height) @@ -756,9 +758,6 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u { for (int i = 0; i < 3; ++i) { - if (distances[i] > outlineThickness) - continue; - const uint2 currentEdgePoints = edgePoints[i]; float3 p0 = v[currentEdgePoints[0]]; float3 p1 = v[currentEdgePoints[1]]; @@ -774,9 +773,6 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u { for (int i = 0; i < 3; ++i) { - if (distances[i] > outlineThickness) - continue; - const uint2 currentEdgePoints = edgePoints[i]; float3 p0 = v[currentEdgePoints[0]]; float3 p1 = v[currentEdgePoints[1]]; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index f726104b5..5d9189d34 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -78,12 +78,6 @@ void dilateHatch(out float2 outOffsetVec, out float2 outUV, const float2 PSInput main(uint vertexID : SV_VertexID) { - // TODO[Przemek]: Disable Everything here and do your own thing as we already discussed, but let's have the same PSInput data passed to fragment. - // your programmable pulling will use the baseVertexBufferAddress BDA address and `vertexID` to RawBufferLoad it's vertex. - // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ - // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. - - ClipProjectionData clipProjectionData; PSInput outV; @@ -147,15 +141,22 @@ PSInput main(uint vertexID : SV_VertexID) ); DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + // TODO: maybe move to fragment shader since we may have multiple contour styles later - const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; - const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; - outV.setOutlineThickness(sdfOutlineThickness); - outV.setContourLineThickness(sdfContourLineThickness); + if (dtm.drawOutlineEnabled()) + { + LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); + const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; + outV.setOutlineThickness(sdfOutlineThickness); + } + if (dtm.drawContourEnabled()) + { + LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); + const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; + outV.setContourLineThickness(sdfContourLineThickness); + } // full screen triangle (this will destroy outline, contour line and height drawing) #if 0 From 95ed1adb218dfc3159ace39ff69a852e81913cda Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 21 Apr 2025 14:41:23 +0330 Subject: [PATCH 051/129] apply inverse gamma on colors --- 62_CAD/shaders/main_pipeline/common.hlsl | 9 +++++++++ 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 7 ++++++- 62_CAD/shaders/main_pipeline/resolve_alphas.hlsl | 5 +++++ 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 5 +++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 4327cf7fe..3c12a3dcf 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -3,6 +3,15 @@ #include "../globals.hlsl" +// This function soley exists to match n4ce's behaviour, colors and color operations for DTMs, Curves, Lines, Hatches are done in linear space and then outputted to linear surface (as if surface had UNORM format, but ours is SRGB) +// We should do gamma "uncorrection" to account for the fact that our surface format is SRGB and will do gamma correction +void gammaUncorrect(inout float3 col) +{ + bool outputToSRGB = true; // TODO + float gamma = (outputToSRGB) ? 2.2f : 1.0f; + col.rgb = pow(col.rgb, gamma); +} + // TODO: Use these in C++ as well once numeric_limits compiles on C++ float32_t2 unpackCurveBoxUnorm(uint32_t2 value) { diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 2b44f24b6..240bf967f 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -409,7 +409,11 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style + { color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + color.a *= float(storedQuantizedAlpha) / 255.f; return color; @@ -852,7 +856,8 @@ float4 fragMain(PSInput input) : SV_TARGET textureColor = dtmColor.rgb; localAlpha = dtmColor.a; - + + gammaUncorrect(textureColor); // want to output to SRGB without gamma correction return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); } else diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index 987dd7c29..86257428f 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -60,10 +60,15 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) if (!resolve) discard; + // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style + { color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + color.a *= float(storedQuantizedAlpha) / 255.f; return color; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 5d9189d34..4c1c74a93 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -26,7 +26,12 @@ float2 QuadraticBezier(float2 p0, float2 p1, float2 p2, float t) ClipProjectionData getClipProjectionData(in MainObject mainObj) { if (mainObj.clipProjectionIndex != InvalidClipProjectionIndex) + { +#ifdef NBL_2D_SHOWCASE_MODE + return nbl::hlsl::mul(globals.defaultClipProjection.projectionToNDC, loadCustomClipProjection(mainObj.clipProjectionIndex)); +#endif return loadCustomClipProjection(mainObj.clipProjectionIndex); + } else return globals.defaultClipProjection; } From 244b0d0a25fefbd9e1274007a59f390c76ddfbeb Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 21 Apr 2025 14:57:46 +0330 Subject: [PATCH 052/129] screenToWorldRatio doesn't need to be double, only used to transform worldspace lineWidth --- 62_CAD/main.cpp | 4 ++-- 62_CAD/shaders/globals.hlsl | 6 +++--- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index f988458b9..6b4217202 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1216,9 +1216,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu globalData.defaultClipProjection.projectionToNDC = projectionToNDC; globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); - auto screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); + float screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); globalData.screenToWorldRatio = screenToWorld; - globalData.worldToScreenRatio = (1.0/screenToWorld); + globalData.worldToScreenRatio = (1.0f/screenToWorld); globalData.miterLimit = 10.0f; globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 2c86b9732..09e809a59 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -71,8 +71,8 @@ struct Globals { Pointers pointers; ClipProjectionData defaultClipProjection; - pfloat64_t screenToWorldRatio; - pfloat64_t worldToScreenRatio; + float screenToWorldRatio; + float worldToScreenRatio; uint32_t2 resolution; float antiAliasingFactor; uint32_t miterLimit; @@ -80,7 +80,7 @@ struct Globals float32_t _padding; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 176u); +static_assert(sizeof(Globals) == 168u); #endif #ifdef __HLSL_VERSION diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 4c1c74a93..479d05888 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -151,14 +151,14 @@ PSInput main(uint vertexID : SV_VertexID) if (dtm.drawOutlineEnabled()) { LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + _static_cast(_static_cast(outlineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio; const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; outV.setOutlineThickness(sdfOutlineThickness); } if (dtm.drawContourEnabled()) { LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); - const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + _static_cast(_static_cast(contourStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio; const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; outV.setContourLineThickness(sdfContourLineThickness); } @@ -196,7 +196,7 @@ PSInput main(uint vertexID : SV_VertexID) LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); // Width is on both sides, thickness is one one side of the curve (div by 2.0f) - const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); + const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + lineStyle.worldSpaceLineWidth * globals.screenToWorldRatio; const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; const float sdfLineThickness = screenSpaceLineWidth / 2.0f; outV.setLineThickness(sdfLineThickness); From b36e702ed6f9e57e966e450b5d3bbb7e8681165d Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 21 Apr 2025 16:07:22 +0330 Subject: [PATCH 053/129] cleanups and setup work for multiple contours --- 62_CAD/CTriangleMesh.h | 22 ++-- 62_CAD/DrawResourcesFiller.cpp | 25 ++-- 62_CAD/DrawResourcesFiller.h | 2 +- 62_CAD/main.cpp | 21 ++-- 62_CAD/shaders/globals.hlsl | 76 +++++++----- 62_CAD/shaders/main_pipeline/common.hlsl | 11 +- .../main_pipeline/fragment_shader.hlsl | 110 +++++++++--------- .../shaders/main_pipeline/vertex_shader.hlsl | 29 +---- 8 files changed, 142 insertions(+), 154 deletions(-) diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 16995c28a..67daf5221 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -6,7 +6,7 @@ using namespace nbl; -struct DTMHeightShadingInfo +struct DTMHeightShadingSettingsInfo { // Height Shading Mode E_HEIGHT_SHADING_MODE heightShadingMode; @@ -35,15 +35,15 @@ struct DTMHeightShadingInfo bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const { const uint32_t mapSize = heightColorSet.size(); - if (mapSize > DTMSettings::HeightColorMapMaxEntries) + if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries) return false; - dtmSettings.heightColorEntryCount = mapSize; + dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize; int index = 0; for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) { - dtmSettings.heightColorMapHeights[index] = it->height; - dtmSettings.heightColorMapColors[index] = it->color; + dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color; ++index; } @@ -65,7 +65,7 @@ struct DTMHeightShadingInfo std::set heightColorSet; }; -struct DTMContourInfo +struct DTMContourSettingsInfo { LineStyleInfo lineStyleInfo; @@ -76,11 +76,17 @@ struct DTMContourInfo struct DTMSettingsInfo { + static constexpr uint32_t MaxContourSettings = DTMSettings::MaxContourSettings; + uint32_t mode = 0u; - DTMHeightShadingInfo heightShadingInfo; - DTMContourInfo contourInfo; + // outline LineStyleInfo outlineStyleInfo; + // contours + uint32_t contourSettingsCount = 0u; + DTMContourSettingsInfo contourSettings[MaxContourSettings]; + // height shading + DTMHeightShadingSettingsInfo heightShadingInfo; }; class CTriangleMesh final diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c6d898a7c..30fb6d748 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -627,7 +627,8 @@ uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineSty uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - const size_t maxMemRequired = sizeof(DTMSettings) + 2 * sizeof(LineStyle); + const size_t noOfLineStylesRequired = ((dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) ? 1u : 0u) + dtmSettingsInfo.contourSettingsCount; + const size_t maxMemRequired = sizeof(DTMSettings) + noOfLineStylesRequired * sizeof(LineStyle); const bool enoughMem = remainingResourcesSize >= maxMemRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? if (!enoughMem) @@ -644,25 +645,29 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) { case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: - dtmSettings.intervalLength = std::numeric_limits::infinity(); + dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits::infinity(); break; case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: - dtmSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; break; case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: - dtmSettings.intervalLength = 0.0f; + dtmSettings.heightShadingSettings.intervalLength = 0.0f; break; } - dtmSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; - dtmSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.heightShadingSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); _NBL_DEBUG_BREAK_IF(!dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings)); } if (dtmSettings.mode & E_DTM_MODE::CONTOUR) { - dtmSettings.contourLinesStartHeight = dtmSettingsInfo.contourInfo.startHeight; - dtmSettings.contourLinesEndHeight = dtmSettingsInfo.contourInfo.endHeight; - dtmSettings.contourLinesHeightInterval = dtmSettingsInfo.contourInfo.heightInterval; - dtmSettings.contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourInfo.lineStyleInfo); + dtmSettings.contourSettingsCount = dtmSettingsInfo.contourSettingsCount; + for (uint32_t i = 0u; i < dtmSettings.contourSettingsCount; ++i) + { + dtmSettings.contourSettings[i].contourLinesStartHeight = dtmSettingsInfo.contourSettings[i].startHeight; + dtmSettings.contourSettings[i].contourLinesEndHeight = dtmSettingsInfo.contourSettings[i].endHeight; + dtmSettings.contourSettings[i].contourLinesHeightInterval = dtmSettingsInfo.contourSettings[i].heightInterval; + dtmSettings.contourSettings[i].contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourSettings[i].lineStyleInfo); + } } if (dtmSettings.mode & E_DTM_MODE::OUTLINE) { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 196ba6885..1e244ae01 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -129,7 +129,7 @@ struct DrawResourcesFiller { // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) - return core::alignUp(sizeof(LineStyle) * 2u + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); + return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); } void allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 6b4217202..0f1653591 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3246,22 +3246,23 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo; - //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING; dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 3.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); - dtmInfo.contourInfo.startHeight = 20; - dtmInfo.contourInfo.endHeight = 90; - dtmInfo.contourInfo.heightInterval = 10; - dtmInfo.contourInfo.lineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmInfo.contourInfo.lineStyleInfo.worldSpaceLineWidth = 1.0f; - dtmInfo.contourInfo.lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + dtmInfo.contourSettingsCount = 1u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 10; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; - dtmInfo.contourInfo.lineStyleInfo.setStipplePatternData(contourStipplePattern); + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS @@ -3311,7 +3312,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); - dtmInfo.contourInfo.lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); for (auto& v : mesh.m_vertices) { diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 09e809a59..bd700785d 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -368,20 +368,18 @@ enum class E_HEIGHT_SHADING_MODE : uint32_t CONTINOUS_INTERVALS }; -// Documentation and explanation of variables in DTMSettingsInfo -struct DTMSettings +struct DTMContourSettings { - const static uint32_t HeightColorMapMaxEntries = 16u; - uint32_t outlineLineStyleIdx; // index into line styles uint32_t contourLineStyleIdx; // index into line styles - - uint32_t mode; // E_DTM_MODE - - // contour lines float contourLinesStartHeight; float contourLinesEndHeight; float contourLinesHeightInterval; +}; +struct DTMHeightShadingSettings +{ + const static uint32_t HeightColorMapMaxEntries = 16u; + // height-color map float intervalLength; float intervalIndexToHeightMultiplier; @@ -399,6 +397,24 @@ struct DTMSettings return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; } +}; + +// Documentation and explanation of variables in DTMSettingsInfo +struct DTMSettings +{ + const static uint32_t MaxContourSettings = 8u; + + uint32_t mode; // E_DTM_MODE + + // outline + uint32_t outlineLineStyleIdx; + + // contour lines + uint32_t contourSettingsCount; + DTMContourSettings contourSettings[MaxContourSettings]; + + // height shading + DTMHeightShadingSettings heightShadingSettings; bool drawOutlineEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::OUTLINE) != 0u; } bool drawContourEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::CONTOUR) != 0u; } @@ -433,41 +449,39 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) if (lhs.mode != rhs.mode) return false; - bool equal = true; if (lhs.drawOutlineEnabled()) { - equal = lhs.outlineLineStyleIdx == rhs.outlineLineStyleIdx; + if (lhs.outlineLineStyleIdx != rhs.outlineLineStyleIdx) + return false; } - if (!equal) - return false; - if (lhs.drawContourEnabled()) { - float contourLinesStartHeight; - float contourLinesEndHeight; - float contourLinesHeightInterval; - - equal = lhs.contourLinesStartHeight == rhs.contourLinesStartHeight && - lhs.contourLinesStartHeight == rhs.contourLinesStartHeight && - lhs.contourLinesStartHeight == rhs.contourLinesStartHeight; + if (lhs.contourSettingsCount != rhs.contourSettingsCount) + return false; + if (!memcmp(lhs.contourSettings, rhs.contourSettings, lhs.contourSettingsCount * sizeof(DTMContourSettings))) + return false; } - if (!equal) - return false; - if (lhs.drawHeightShadingEnabled()) { - equal = lhs.intervalLength == rhs.intervalLength && - lhs.intervalIndexToHeightMultiplier == rhs.intervalIndexToHeightMultiplier && - lhs.isCenteredShading == rhs.isCenteredShading && - lhs.heightColorEntryCount == rhs.heightColorEntryCount; - - equal == equal && (memcmp(lhs.heightColorMapHeights, rhs.heightColorMapHeights, lhs.heightColorEntryCount * sizeof(float))); - equal == equal && (memcmp(lhs.heightColorMapColors, rhs.heightColorMapColors, lhs.heightColorEntryCount * sizeof(float32_t4))); + if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength) + return false; + if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier) + return false; + if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading) + return false; + if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount) + return false; + + + if(!memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + return false; + if(!memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + return false; } - return equal; + return true; } #endif diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 3c12a3dcf..631e421b9 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -224,16 +224,7 @@ struct PSInput void setImageTextureId(uint32_t textureId) { data2.x = asfloat(textureId); } /* TRIANGLE MESH */ - - float getOutlineThickness() { return asfloat(data1.z); } - float getContourLineThickness() { return asfloat(data1.w); } - - void setOutlineThickness(float lineThickness) { data1.z = asuint(lineThickness); } - void setContourLineThickness(float stretch) { data1.w = asuint(stretch); } - - void setHeight(float height) { interp_data5.x = height; } - float getHeight() { return interp_data5.x; } - + #ifndef FRAGMENT_SHADER_INPUT // vertex shader void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } #else // fragment shader diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 240bf967f..fb6b6e8e8 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -337,12 +337,12 @@ typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; // for usage in upper_bound function struct DTMSettingsHeightsAccessor { - DTMSettings dtmSettings; + DTMHeightShadingSettings settings; using value_type = float; float operator[](const uint32_t ix) { - return dtmSettings.heightColorMapHeights[ix]; + return settings.heightColorMapHeights[ix]; } }; @@ -470,25 +470,25 @@ float getIntervalPosition(in float height, in float minHeight, in float interval return ( (height - minHeight) / intervalLength ); } -void getIntervalHeightAndColor(in int intervalIndex, in DTMSettings dtmSettings, out float4 outIntervalColor, out float outIntervalHeight) +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) { - float minShadingHeight = dtmSettings.heightColorMapHeights[0]; - float heightForColor = minShadingHeight + float(intervalIndex) * dtmSettings.intervalIndexToHeightMultiplier; + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; - if (dtmSettings.isCenteredShading) - outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * dtmSettings.intervalLength; + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; else - outIntervalHeight = minShadingHeight + (float(intervalIndex)) * dtmSettings.intervalLength; + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; - uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, dtmSettings.heightColorEntryCount, heightForColor), dtmSettings.heightColorEntryCount-1u); + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount-1u); uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); - float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; - float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; if (upperBoundHeight == lowerBoundHeight) { @@ -510,14 +510,14 @@ float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3 return float3(u, v, w); } -float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) +float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) { float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); // HEIGHT SHADING - const uint32_t heightMapSize = dtmSettings.heightColorEntryCount; - float minShadingHeight = dtmSettings.heightColorMapHeights[0]; - float maxShadingHeight = dtmSettings.heightColorMapHeights[heightMapSize - 1]; + const uint32_t heightMapSize = settings.heightColorEntryCount; + float minShadingHeight = settings.heightColorMapHeights[0]; + float maxShadingHeight = settings.heightColorMapHeights[heightMapSize - 1]; if (heightMapSize > 0) { @@ -549,11 +549,11 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); // calculate height color - E_HEIGHT_SHADING_MODE mode = dtmSettings.determineHeightShadingMode(); + E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); int mapIndex = max(upperBoundIndex - 1, 0); int mapIndexPrev = max(mapIndex - 1, 0); @@ -563,13 +563,13 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values bool blendWithPrev = (mapIndex > 0) - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < dtmSettings.heightColorMapHeights[upperBoundIndex] + dtmSettings.heightColorMapHeights[mapIndex])); + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); HeightSegmentTransitionData transitionInfo; transitionInfo.currentHeight = height; - transitionInfo.currentSegmentColor = dtmSettings.heightColorMapColors[mapIndex]; - transitionInfo.boundaryHeight = blendWithPrev ? dtmSettings.heightColorMapHeights[mapIndex] : dtmSettings.heightColorMapHeights[mapIndexNext]; - transitionInfo.otherSegmentColor = blendWithPrev ? dtmSettings.heightColorMapColors[mapIndexPrev] : dtmSettings.heightColorMapColors[mapIndexNext]; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); outputColor.rgb = localHeightColor.rgb; @@ -577,13 +577,13 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl } else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) { - float intervalPosition = getIntervalPosition(height, minShadingHeight, dtmSettings.intervalLength, dtmSettings.isCenteredShading); + float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); float positionWithinInterval = frac(intervalPosition); int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); float4 currentIntervalColor; float currentIntervalHeight; - getIntervalHeightAndColor(intervalIndex, dtmSettings, currentIntervalColor, currentIntervalHeight); + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); bool blendWithPrev = (positionWithinInterval < 0.5f); @@ -594,13 +594,13 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl { int prevIntervalIdx = max(intervalIndex - 1, 0); float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev - getIntervalHeightAndColor(prevIntervalIdx, dtmSettings, transitionInfo.otherSegmentColor, prevIntervalHeight); + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); transitionInfo.boundaryHeight = currentIntervalHeight; } else { int nextIntervalIdx = intervalIndex + 1; - getIntervalHeightAndColor(nextIntervalIdx, dtmSettings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); } float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); @@ -609,15 +609,15 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl } else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { dtmSettings }; + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - float upperBoundHeight = dtmSettings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = dtmSettings.heightColorMapHeights[lowerBoundHeightIndex]; + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; - float4 upperBoundColor = dtmSettings.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = dtmSettings.heightColorMapColors[lowerBoundHeightIndex]; + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; float interpolationVal; if (upperBoundHeightIndex == 0) @@ -635,20 +635,19 @@ float4 calculateDTMHeightColor(in DTMSettings dtmSettings, in float3 v[3], in fl return outputColor; } -float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float height) +float4 calculateDTMContourColor(in DTMContourSettings contourSettings, in float3 v[3], in uint2 edgePoints[3], in float2 fragPos, in float height) { float4 outputColor; - LineStyle contourStyle = loadLineStyle(dtmSettings.contourLineStyleIdx); - const float contourThickness = psInput.getContourLineThickness(); + LineStyle contourStyle = loadLineStyle(contourSettings.contourLineStyleIdx); + const float contourThickness = (contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; float stretch = 1.0f; float phaseShift = 0.0f; - const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); // TODO: move to ubo or push constants - const float startHeight = dtmSettings.contourLinesStartHeight; - const float endHeight = dtmSettings.contourLinesEndHeight; - const float interval = dtmSettings.contourLinesHeightInterval; + const float startHeight = contourSettings.contourLinesStartHeight; + const float endHeight = contourSettings.contourLinesEndHeight; + const float interval = contourSettings.contourLinesHeightInterval; // TODO: can be precomputed const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; @@ -694,7 +693,7 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u float distance = nbl::hlsl::numeric_limits::max; if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, psInput.position.xy, contourThickness, contourStyle.isRoadStyleFlag); + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag); } else { @@ -702,8 +701,8 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, psInput.position.xy, contourThickness, contourStyle.isRoadStyleFlag, clipper); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag, clipper); } outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; @@ -715,14 +714,13 @@ float4 calculateDTMContourColor(in DTMSettings dtmSettings, in float3 v[3], in u return float4(0.0f, 0.0f, 0.0f, 0.0f); } -float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in uint2 edgePoints[3], in PSInput psInput, in float3 baryCoord, in float height) +float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in uint2 edgePoints[3], in float2 fragPos, in float3 baryCoord, in float height) { float4 outputColor; - LineStyle outlineStyle = loadLineStyle(dtmSettings.outlineLineStyleIdx); - const float outlineThickness = psInput.getOutlineThickness(); + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float worldToScreenRatio = psInput.getCurrentWorldToScreenRatio(); const float stretch = 1.0f; // index of vertex opposing an edge, needed for calculation of triangle heights @@ -768,7 +766,7 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u float distance = nbl::hlsl::numeric_limits::max; nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - distance = ClippedSignedDistance >::sdf(lineSegment, psInput.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag); + distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); minDistance = min(minDistance, distance); } @@ -793,8 +791,8 @@ float4 calculateDTMOutlineColor(in DTMSettings dtmSettings, in float3 v[3], in u float distance = nbl::hlsl::numeric_limits::max; nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, psInput.position.xy, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); minDistance = min(minDistance, distance); } @@ -848,11 +846,11 @@ float4 fragMain(PSInput input) : SV_TARGET float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); if (dtmSettings.drawHeightShadingEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMHeightColor(dtmSettings, v, heightDeriv, input.position.xy, height)); + dtmColor = blendColorOnTop(dtmColor, calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); if (dtmSettings.drawContourEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings, v, edgePoints, input, height)); + dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings.contourSettings[0u], v, edgePoints, input.position.xy, height)); if (dtmSettings.drawOutlineEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMOutlineColor(dtmSettings, v, edgePoints, input, baryCoord, height)); + dtmColor = blendColorOnTop(dtmColor, calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, edgePoints, input.position.xy, baryCoord, height)); textureColor = dtmColor.rgb; localAlpha = dtmColor.a; @@ -874,7 +872,6 @@ float4 fragMain(PSInput input) : SV_TARGET const float thickness = input.getLineThickness(); const float phaseShift = input.getCurrentPhaseShift(); const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); @@ -887,7 +884,7 @@ float4 fragMain(PSInput input) : SV_TARGET else { nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); } } @@ -900,7 +897,6 @@ float4 fragMain(PSInput input) : SV_TARGET const float thickness = input.getLineThickness(); const float phaseShift = input.getCurrentPhaseShift(); const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); LineStyle style = loadLineStyle(styleIdx); if (!style.hasStipples() || stretch == InvalidStyleStretchValue) @@ -909,7 +905,7 @@ float4 fragMain(PSInput input) : SV_TARGET } else { - BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio ); distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); } } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 479d05888..7ce0f43e7 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -128,6 +128,8 @@ PSInput main(uint vertexID : SV_VertexID) triangleVertices[2].pos = triangleVertices[2].pos - triangleCentroid; // TODO: calculate dialation factor + // const float dilateByPixels = 0.5 * (dtmSettings.maxScreenSpaceLineWidth + dtmSettings.maxWorldSpaceLineWidth * globals.screenToWorldRatio) + aaFactor; + pfloat64_t dialationFactor = _static_cast(2.0f); pfloat64_t2 dialatedVertex = triangleVertices[currentVertexWithinTriangleIndex].pos * dialationFactor; @@ -138,30 +140,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.position = transformFromSreenSpaceToNdc(transformedDilatedPos, globals.resolution); const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); - outV.setHeight(heightAsFloat); outV.setScreenSpaceVertexAttribs(float3(transformedOriginalPos, heightAsFloat)); - outV.setCurrentWorldToScreenRatio( - _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) - ); - - DTMSettings dtm = loadDTMSettings(mainObj.dtmSettingsIdx); - - // TODO: maybe move to fragment shader since we may have multiple contour styles later - if (dtm.drawOutlineEnabled()) - { - LineStyle outlineStyle = loadLineStyle(dtm.outlineLineStyleIdx); - const float screenSpaceOutlineWidth = outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio; - const float sdfOutlineThickness = screenSpaceOutlineWidth * 0.5f; - outV.setOutlineThickness(sdfOutlineThickness); - } - if (dtm.drawContourEnabled()) - { - LineStyle contourStyle = loadLineStyle(dtm.contourLineStyleIdx); - const float screenSpaceContourLineWidth = contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio; - const float sdfContourLineThickness = screenSpaceContourLineWidth * 0.5f; - outV.setContourLineThickness(sdfContourLineThickness); - } // full screen triangle (this will destroy outline, contour line and height drawing) #if 0 @@ -200,10 +179,6 @@ PSInput main(uint vertexID : SV_VertexID) const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; const float sdfLineThickness = screenSpaceLineWidth / 2.0f; outV.setLineThickness(sdfLineThickness); - outV.setCurrentWorldToScreenRatio( - _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) - ); if (objType == ObjectType::LINE) { From 2462d0adfa9a49e30dacbb0d25c3bd1acc47a62f Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 22 Apr 2025 08:58:22 +0330 Subject: [PATCH 054/129] multiple contours in fragment shader --- 62_CAD/main.cpp | 11 +++++++++-- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 0f1653591..5f48af58a 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3246,15 +3246,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo; - dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode = E_DTM_MODE::CONTOUR; + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 3.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); - dtmInfo.contourSettingsCount = 1u; + dtmInfo.contourSettingsCount = 2u; dtmInfo.contourSettings[0u].startHeight = 20; dtmInfo.contourSettings[0u].endHeight = 90; dtmInfo.contourSettings[0u].heightInterval = 10; @@ -3264,6 +3266,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS // 2 - DISCRETE_FIXED_LENGTH_INTERVALS diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index fb6b6e8e8..31c25a6e5 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -848,7 +848,10 @@ float4 fragMain(PSInput input) : SV_TARGET if (dtmSettings.drawHeightShadingEnabled()) dtmColor = blendColorOnTop(dtmColor, calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); if (dtmSettings.drawContourEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings.contourSettings[0u], v, edgePoints, input.position.xy, height)); + { + for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder + dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings.contourSettings[i], v, edgePoints, input.position.xy, height)); + } if (dtmSettings.drawOutlineEnabled()) dtmColor = blendColorOnTop(dtmColor, calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, edgePoints, input.position.xy, baryCoord, height)); From 287688e18f75aa1a24c054691e5e0e197c9f751f Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 22 Apr 2025 11:05:58 +0200 Subject: [PATCH 055/129] Small example 62 update --- 62_CAD/main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 5f48af58a..3a32b8fa8 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3245,10 +3245,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); - DTMSettingsInfo dtmInfo; - //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; - //dtmInfo.mode = E_DTM_MODE::HEIGHT_SHADING; - dtmInfo.mode = E_DTM_MODE::CONTOUR; + DTMSettingsInfo dtmInfo{}; + dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 3.0f; From daa51f68fc040b8927a9c5d1e0bd8e6dc4e3cac1 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 22 Apr 2025 21:05:25 +0200 Subject: [PATCH 056/129] Refactor --- 62_CAD/CTriangleMesh.h | 2 +- 62_CAD/main.cpp | 10 +- 62_CAD/shaders/globals.hlsl | 2 +- 62_CAD/shaders/main_pipeline/dtm.hlsl | 678 ++++++++++++++++ .../main_pipeline/fragment_shader.hlsl | 723 +----------------- 5 files changed, 695 insertions(+), 720 deletions(-) create mode 100644 62_CAD/shaders/main_pipeline/dtm.hlsl diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h index 67daf5221..78f7dd99f 100644 --- a/62_CAD/CTriangleMesh.h +++ b/62_CAD/CTriangleMesh.h @@ -78,7 +78,7 @@ struct DTMSettingsInfo { static constexpr uint32_t MaxContourSettings = DTMSettings::MaxContourSettings; - uint32_t mode = 0u; + uint32_t mode = 0u; // related to E_DTM_MODE // outline LineStyleInfo outlineStyleInfo; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 3a32b8fa8..9f5392d4b 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1444,8 +1444,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void addObjects(SIntendedSubmitInfo& intendedNextSubmit) { - // TODO[Przemek]: add your own case, you won't call any other drawResourcesFiller function, only drawMesh with your custom made Mesh (for start it can be a single triangle) - // we record upload of our objects and if we failed to allocate we submit everything if (!intendedNextSubmit.valid()) { @@ -3232,9 +3230,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // SINGLE TRIANGLE /*core::vector vertices = { - { float32_t2(0.0, 0.0), -20.0 }, - { float32_t2(200.0, 200.0), 100.0 }, - { float32_t2(200.0, -200.0), 80.0 } + { float64_t2(0.0, 0.0), -20.0 }, + { float64_t2(-200.0, -200.0), 100.0 }, + { float64_t2(200.0, -100.0), 80.0 }, }; core::vector indices = { @@ -3251,7 +3249,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.mode |= E_DTM_MODE::CONTOUR; dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 3.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index bd700785d..045e11f1e 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -300,7 +300,7 @@ NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits: struct TriangleMeshVertex { pfloat64_t2 pos; - pfloat64_t height; + pfloat64_t height; // TODO: can be of type float32_t instead }; // The color parameter is also used for styling non-curve objects such as text glyphs and hatches with solid color diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl new file mode 100644 index 000000000..5b41eabb3 --- /dev/null +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -0,0 +1,678 @@ +#ifndef _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ + +#include +#include + +// TODO: functions outside of the "dtm" namespace need to be moved to another file + +// for usage in upper_bound function +struct StyleAccessor +{ + LineStyle style; + using value_type = float; + + float operator[](const uint32_t ix) + { + return style.getStippleValue(ix); + } +}; + +template +struct StyleClipper +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; + + static StyleClipper construct( + LineStyle style, + CurveType curve, + typename CurveType::ArcLengthCalculator arcLenCalc, + float phaseShift, + float stretch, + float worldToScreenRatio) + { + StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; + + // values for non-uniform stretching with a rigid segment + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + // rigidSegment info in old non stretched pattern + ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; + ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; + ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; + // stretch value for non rigid segments + ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); + // rigidSegment info to new stretched pattern + ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start + ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len + ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end + } + else + { + ret.nonRigidSegmentStretchValue = stretch; + } + + return ret; + } + + // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes + // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern + float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) + { + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment + ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment + ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment + ret *= stretch; + return ret; + } + else + { + return normalizedPlaceInPattern; + } + } + + float_t2 operator()(float_t t) + { + // basicaly 0.0 and 1.0 but with a guardband to discard outside the range + const float_t minT = 0.0 - 1.0; + const float_t maxT = 1.0 + 1.0; + + StyleAccessor styleAccessor = { style }; + const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; + const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); + + const float_t arcLen = arcLenCalc.calcArcLen(t); + const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); + float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); + uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); + + const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; + float_t2 ret = float_t2(InvalidT, InvalidT); + + // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections + const bool notInDrawSection = patternIdx & 0x1; + + // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections + float_t minDrawT = 0.0; + float_t maxDrawT = 1.0; + { + float_t normalizedPlaceInPatternBegin = frac(phaseShift); + normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); + uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); + const bool BeginInNonDrawSection = patternIdxBegin & 0x1; + + if (BeginInNonDrawSection) + { + float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; + diffToRightDrawableSection -= normalizedPlaceInPatternBegin; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; + minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); + } + + // Completely in non-draw section -> clip away: + if (minDrawT >= 1.0) + return ret; + + const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); + const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); + float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); + uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); + const bool EndInNonDrawSection = patternIdxEnd & 0x1; + + if (EndInNonDrawSection) + { + float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; + maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); + } + } + + if (notInDrawSection) + { + float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + + float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; + const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; + float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); + t0 = clamp(t0, minDrawT, maxDrawT); + + float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; + diffToRightDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; + const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; + float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); + t1 = clamp(t1, minDrawT, maxDrawT); + + ret = float_t2(t0, t1); + } + else + { + t = clamp(t, minDrawT, maxDrawT); + ret = float_t2(t, t); + } + + return ret; + } + + LineStyle style; + CurveType curve; + typename CurveType::ArcLengthCalculator arcLenCalc; + float phaseShift; + float stretch; + float worldToScreenRatio; + // precomp value for non uniform stretching + float rigidSegmentStart; + float rigidSegmentEnd; + float rigidSegmentLen; + float nonRigidSegmentStretchValue; +}; + +typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; +typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; + +template +struct DefaultClipper +{ + using float_t2 = vector; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; + + static DefaultClipper construct() + { + DefaultClipper ret; + return ret; + } + + inline float_t2 operator()(const float_t t) + { + const float_t ret = clamp(t, 0.0, 1.0); + return float_t2(ret, ret); + } +}; + +template > +struct ClippedSignedDistance +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + + const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) + { + typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); + + const float_t InvalidT = nbl::hlsl::numeric_limits::max; + // TODO: Fix and test, we're not working with squared distance anymore + const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? + + bool clipped = false; + float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; + float_t closestT = InvalidT; + [[unroll(CurveType::MaxCandidates)]] + for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) + { + const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); + if (candidateDistanceSquared < closestDistanceSquared) + { + float_t2 snappedTs = clipper(candidates[i]); + + if (snappedTs[0] == InvalidT) + { + continue; + } + + if (snappedTs[0] != candidates[i]) + { + // left snapped or clamped + const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); + if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[0]; + closestDistanceSquared = leftSnappedCandidateDistanceSquared; + } + + if (snappedTs[0] != snappedTs[1]) + { + // right snapped or clamped + const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); + if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[1]; + closestDistanceSquared = rightSnappedCandidateDistanceSquared; + } + } + } + else + { + // no snapping + if (candidateDistanceSquared < closestDistanceSquared) + { + clipped = false; + closestT = candidates[i]; + closestDistanceSquared = candidateDistanceSquared; + } + } + } + } + + + float_t roundedDistance = closestDistanceSquared - thickness; + if (!isRoadStyle) + { + return roundedDistance; + } + else + { + const float_t aaWidth = globals.antiAliasingFactor; + float_t rectCappedDistance = roundedDistance; + + if (clipped) + { + float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); + rectCappedDistance = capSquare(q, thickness, aaWidth); + } + + return rectCappedDistance; + } + } + + static float capSquare(float_t2 q, float_t th, float_t aaWidth) + { + float_t2 d = abs(q) - float_t2(aaWidth, th); + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); + } +}; + +namespace dtm +{ + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMHeightShadingSettings settings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return settings.heightColorMapHeights[ix]; + } +}; + +float dot2(in float2 vec) +{ + return dot(vec, vec); +} + +// TODO: Later move these functions and structs to dtmSettings.hlsl and a namespace like dtmSettings::height_shading or dtmSettings::contours, etc.. +struct HeightSegmentTransitionData +{ + float currentHeight; + float4 currentSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; +}; + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) +{ + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} + +// Computes the continuous position of a height value within uniform intervals. +// flooring this value will give the interval index +// +// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the +// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. +// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans +// [minHeight, minHeight + intervalLength]. +// +// Parameters: +// - height: The height value to classify. +// - minHeight: The reference starting height for interval calculation. +// - intervalLength: The length of each interval segment. +// - isCenteredShading: Whether to center the shading intervals around minHeight. +// +// Returns: +// - A float representing the continuous position within the interval grid. +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ((height - minHeight) / intervalLength + 0.5f); + else + return ((height - minHeight) / intervalLength); +} + +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) +{ + float denom = (v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y); + float u = ((v2.y - v3.y) * (p.x - v3.x) + (v3.x - v2.x) * (p.y - v3.y)) / denom; + float v = ((v3.y - v1.y) * (p.x - v3.x) + (v1.x - v3.x) * (p.y - v3.y)) / denom; + float w = 1.0 - u - v; + return float3(u, v, w); +} + +float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) +{ + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + // HEIGHT SHADING + const uint32_t heightMapSize = settings.heightColorEntryCount; + float minShadingHeight = settings.heightColorMapHeights[0]; + float maxShadingHeight = settings.heightColorMapHeights[heightMapSize - 1]; + + if (heightMapSize > 0) + { + // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez + float2 e0 = v[1] - v[0]; + float2 e1 = v[2] - v[1]; + float2 e2 = v[0] - v[2]; + + float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); + float2 v0 = fragPos - v[0]; + float2 v1 = fragPos - v[1]; + float2 v2 = fragPos - v[2]; + + float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); + float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); + float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); + + float line0Sdf = distanceToLine0 * triangleAreaSign * sign(v0.x * e0.y - v0.y * e0.x); + float line1Sdf = distanceToLine1 * triangleAreaSign * sign(v1.x * e1.y - v1.y * e1.x); + float line2Sdf = distanceToLine2 * triangleAreaSign * sign(v2.x * e2.y - v2.y * e2.x); + float line3Sdf = (minShadingHeight - height) / heightDeriv; + float line4Sdf = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = max(line0Sdf, line1Sdf); + convexPolygonSdf = max(convexPolygonSdf, line2Sdf); + convexPolygonSdf = max(convexPolygonSdf, line3Sdf); + convexPolygonSdf = max(convexPolygonSdf, line4Sdf); + + outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor + globals.antiAliasingFactor, convexPolygonSdf); + + // calculate height color + E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); + if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); + float positionWithinInterval = frac(intervalPosition); + int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); + + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; + if (blendWithPrev) + { + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; + } + else + { + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + } + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + + outputColor.a *= localHeightColor.a; + outputColor.rgb = localHeightColor.rgb * outputColor.a + outputColor.rgb * (1.0f - outputColor.a); + } + } + + return outputColor; +} + +float4 calculateDTMContourColor(in DTMContourSettings contourSettings, in float3 v[3], in float2 fragPos, in float height) +{ + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + LineStyle contourStyle = loadLineStyle(contourSettings.contourLineStyleIdx); + const float contourThickness = (contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + float stretch = 1.0f; + float phaseShift = 0.0f; + + // TODO: move to ubo or push constants + const float startHeight = contourSettings.contourLinesStartHeight; + const float endHeight = contourSettings.contourLinesEndHeight; + const float interval = contourSettings.contourLinesHeightInterval; + + // TODO: can be precomputed + const int maxContourLineIdx = (endHeight - startHeight) / interval; + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight) / interval + 0.5f); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + // TODO: case where heights we are looking for are on all three vertices + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 2) + break; + + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + if (p1.z < p0.z) + nbl::hlsl::swap(p0, p1); + + float minHeight = p0.z; + float maxHeight = p1.z; + + if (height >= minHeight && height <= maxHeight) + { + float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); + float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); + + contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); + ++contourLinePointsIdx; + } + } + + if (contourLinePointsIdx == 2) + { + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + + float distance = nbl::hlsl::numeric_limits::max; + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, distance); + outputColor.a *= contourStyle.color.a; + outputColor.rgb = contourStyle.color.rgb; + + return outputColor; + } + + return float4(0.0f, 0.0f, 0.0f, 0.0f); +} + +float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in float2 fragPos, in float3 baryCoord, in float height) +{ + float4 outputColor; + + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 1.0f; + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + + } + + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + +float4 blendUnder(in float4 srcColor, in float4 dstColor) +{ + dstColor.rgb = dstColor.rgb * dstColor.a + (1 - dstColor.a) * srcColor.a * srcColor.rgb; + dstColor.a = (1.0f - srcColor.a) * dstColor.a + srcColor.a; + + return dstColor; +} +} + +#endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 31c25a6e5..6c579dff6 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -1,5 +1,6 @@ #define FRAGMENT_SHADER_INPUT #include "common.hlsl" +#include "dtm.hlsl" #include #include #include @@ -8,294 +9,7 @@ #include #include #include -#include - -template -struct DefaultClipper -{ - using float_t2 = vector; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; - - static DefaultClipper construct() - { - DefaultClipper ret; - return ret; - } - - inline float_t2 operator()(const float_t t) - { - const float_t ret = clamp(t, 0.0, 1.0); - return float_t2(ret, ret); - } -}; - -// for usage in upper_bound function -struct StyleAccessor -{ - LineStyle style; - using value_type = float; - - float operator[](const uint32_t ix) - { - return style.getStippleValue(ix); - } -}; - -template -struct StyleClipper -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; - - static StyleClipper construct( - LineStyle style, - CurveType curve, - typename CurveType::ArcLengthCalculator arcLenCalc, - float phaseShift, - float stretch, - float worldToScreenRatio) - { - StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; - - // values for non-uniform stretching with a rigid segment - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - // rigidSegment info in old non stretched pattern - ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; - ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; - ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; - // stretch value for non rigid segments - ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); - // rigidSegment info to new stretched pattern - ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start - ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len - ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end - } - else - { - ret.nonRigidSegmentStretchValue = stretch; - } - - return ret; - } - - // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes - // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern - float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) - { - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment - ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment - ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment - ret *= stretch; - return ret; - } - else - { - return normalizedPlaceInPattern; - } - } - - float_t2 operator()(float_t t) - { - // basicaly 0.0 and 1.0 but with a guardband to discard outside the range - const float_t minT = 0.0 - 1.0; - const float_t maxT = 1.0 + 1.0; - - StyleAccessor styleAccessor = { style }; - const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; - const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); - - const float_t arcLen = arcLenCalc.calcArcLen(t); - const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); - float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); - uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); - - const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; - float_t2 ret = float_t2(InvalidT, InvalidT); - - // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections - const bool notInDrawSection = patternIdx & 0x1; - - // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections - float_t minDrawT = 0.0; - float_t maxDrawT = 1.0; - { - float_t normalizedPlaceInPatternBegin = frac(phaseShift); - normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); - uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); - const bool BeginInNonDrawSection = patternIdxBegin & 0x1; - - if (BeginInNonDrawSection) - { - float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; - diffToRightDrawableSection -= normalizedPlaceInPatternBegin; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; - minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); - } - - // Completely in non-draw section -> clip away: - if (minDrawT >= 1.0) - return ret; - - const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); - const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); - float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); - uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); - const bool EndInNonDrawSection = patternIdxEnd & 0x1; - - if (EndInNonDrawSection) - { - float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; - maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); - } - } - - if (notInDrawSection) - { - float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - - float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; - const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; - float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); - t0 = clamp(t0, minDrawT, maxDrawT); - - float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; - diffToRightDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; - const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; - float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); - t1 = clamp(t1, minDrawT, maxDrawT); - - ret = float_t2(t0, t1); - } - else - { - t = clamp(t, minDrawT, maxDrawT); - ret = float_t2(t, t); - } - - return ret; - } - - LineStyle style; - CurveType curve; - typename CurveType::ArcLengthCalculator arcLenCalc; - float phaseShift; - float stretch; - float worldToScreenRatio; - // precomp value for non uniform stretching - float rigidSegmentStart; - float rigidSegmentEnd; - float rigidSegmentLen; - float nonRigidSegmentStretchValue; -}; - -template > -struct ClippedSignedDistance -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - - const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) - { - typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); - - const float_t InvalidT = nbl::hlsl::numeric_limits::max; - // TODO: Fix and test, we're not working with squared distance anymore - const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? - - bool clipped = false; - float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; - float_t closestT = InvalidT; - [[unroll(CurveType::MaxCandidates)]] - for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) - { - const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); - if (candidateDistanceSquared < closestDistanceSquared) - { - float_t2 snappedTs = clipper(candidates[i]); - - if (snappedTs[0] == InvalidT) - { - continue; - } - - if (snappedTs[0] != candidates[i]) - { - // left snapped or clamped - const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); - if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[0]; - closestDistanceSquared = leftSnappedCandidateDistanceSquared; - } - - if (snappedTs[0] != snappedTs[1]) - { - // right snapped or clamped - const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); - if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[1]; - closestDistanceSquared = rightSnappedCandidateDistanceSquared; - } - } - } - else - { - // no snapping - if (candidateDistanceSquared < closestDistanceSquared) - { - clipped = false; - closestT = candidates[i]; - closestDistanceSquared = candidateDistanceSquared; - } - } - } - } - - - float_t roundedDistance = closestDistanceSquared - thickness; - if(!isRoadStyle) - { - return roundedDistance; - } - else - { - const float_t aaWidth = globals.antiAliasingFactor; - float_t rectCappedDistance = roundedDistance; - - if (clipped) - { - float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); - rectCappedDistance = capSquare(q, thickness, aaWidth); - } - - return rectCappedDistance; - } - } - - static float capSquare(float_t2 q, float_t th, float_t aaWidth) - { - float_t2 d = abs(q) - float_t2(aaWidth, th); - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); - } -}; +//#include // sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ float sdTrapezoid(float2 p, float r1, float r2, float he) @@ -331,21 +45,6 @@ float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb return sdTrapezoid(p, ra, rb, h); } -typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; -typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; - -// for usage in upper_bound function -struct DTMSettingsHeightsAccessor -{ - DTMHeightShadingSettings settings; - using value_type = float; - - float operator[](const uint32_t ix) - { - return settings.heightColorMapHeights[ix]; - } -}; - // We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm // because there is no `if constexpr` in hlsl // @params @@ -419,401 +118,6 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } -float dot2(in float2 vec) -{ - return dot(vec, vec); -} - - -// TODO: Later move these functions and structs to dtmSettings.hlsl and a namespace like dtmSettings::height_shading or dtmSettings::contours, etc.. - -struct HeightSegmentTransitionData -{ - float currentHeight; - float4 currentSegmentColor; - float boundaryHeight; - float4 otherSegmentColor; -}; - -// This function interpolates between the current and nearest segment colors based on the -// screen-space distance to the segment boundary. The result is a smoothly blended color -// useful for visualizing discrete height levels without harsh edges. -float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) -{ - float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); - float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); - float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); - return localHeightColor; -} - -// Computes the continuous position of a height value within uniform intervals. -// flooring this value will give the interval index -// -// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the -// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. -// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans -// [minHeight, minHeight + intervalLength]. -// -// Parameters: -// - height: The height value to classify. -// - minHeight: The reference starting height for interval calculation. -// - intervalLength: The length of each interval segment. -// - isCenteredShading: Whether to center the shading intervals around minHeight. -// -// Returns: -// - A float representing the continuous position within the interval grid. -float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) -{ - if (isCenteredShading) - return ( (height - minHeight) / intervalLength + 0.5f); - else - return ( (height - minHeight) / intervalLength ); -} - -void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) -{ - float minShadingHeight = settings.heightColorMapHeights[0]; - float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; - - if (settings.isCenteredShading) - outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; - else - outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; - - DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; - uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount-1u); - uint32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); - - float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; - - float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; - - if (upperBoundHeight == lowerBoundHeight) - { - outIntervalColor = upperBoundColor; - } - else - { - float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - } -} - -float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) -{ - float denom = (v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y); - float u = ((v2.y - v3.y) * (p.x - v3.x) + (v3.x - v2.x) * (p.y - v3.y)) / denom; - float v = ((v3.y - v1.y) * (p.x - v3.x) + (v1.x - v3.x) * (p.y - v3.y)) / denom; - float w = 1.0 - u - v; - return float3(u, v, w); -} - -float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 v[3], in float heightDeriv, in float2 fragPos, in float height) -{ - float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); - - // HEIGHT SHADING - const uint32_t heightMapSize = settings.heightColorEntryCount; - float minShadingHeight = settings.heightColorMapHeights[0]; - float maxShadingHeight = settings.heightColorMapHeights[heightMapSize - 1]; - - if (heightMapSize > 0) - { - // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez - float2 e0 = v[1] - v[0]; - float2 e1 = v[2] - v[1]; - float2 e2 = v[0] - v[2]; - - float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); - float2 v0 = fragPos - v[0]; - float2 v1 = fragPos - v[1]; - float2 v2 = fragPos - v[2]; - - float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); - float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); - float distanceToLine2 = sqrt(dot2(v2 - e2 * dot(v2, e2) / dot(e2, e2))); - - float line0Sdf = distanceToLine0 * triangleAreaSign * (v0.x * e0.y - v0.y * e0.x); - float line1Sdf = distanceToLine1 * triangleAreaSign * (v1.x * e1.y - v1.y * e1.x); - float line2Sdf = distanceToLine2 * triangleAreaSign * (v2.x * e2.y - v2.y * e2.x); - float line3Sdf = (minShadingHeight - height) / heightDeriv; - float line4Sdf = (height - maxShadingHeight) / heightDeriv; - - float convexPolygonSdf = max(line0Sdf, line1Sdf); - convexPolygonSdf = max(convexPolygonSdf, line2Sdf); - convexPolygonSdf = max(convexPolygonSdf, line3Sdf); - convexPolygonSdf = max(convexPolygonSdf, line4Sdf); - - outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor * 2.0f, convexPolygonSdf); - - // calculate height color - E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); - - if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) - { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; - int upperBoundIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - int mapIndex = max(upperBoundIndex - 1, 0); - int mapIndexPrev = max(mapIndex - 1, 0); - int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); - - // logic explainer: if colorIdx is 0.0 then it means blend with next - // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true - // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values - bool blendWithPrev = (mapIndex > 0) - && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); - - HeightSegmentTransitionData transitionInfo; - transitionInfo.currentHeight = height; - transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; - transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; - transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; - - float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); - outputColor.rgb = localHeightColor.rgb; - outputColor.a *= localHeightColor.a; - } - else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) - { - float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); - float positionWithinInterval = frac(intervalPosition); - int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); - - float4 currentIntervalColor; - float currentIntervalHeight; - getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); - - bool blendWithPrev = (positionWithinInterval < 0.5f); - - HeightSegmentTransitionData transitionInfo; - transitionInfo.currentHeight = height; - transitionInfo.currentSegmentColor = currentIntervalColor; - if (blendWithPrev) - { - int prevIntervalIdx = max(intervalIndex - 1, 0); - float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev - getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); - transitionInfo.boundaryHeight = currentIntervalHeight; - } - else - { - int nextIntervalIdx = intervalIndex + 1; - getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); - } - - float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); - outputColor.rgb = localHeightColor.rgb; - outputColor.a *= localHeightColor.a; - } - else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) - { - DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; - uint32_t upperBoundHeightIndex = nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, heightMapSize, height); - uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; - - float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; - float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; - - float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; - float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; - - float interpolationVal; - if (upperBoundHeightIndex == 0) - interpolationVal = 1.0f; - else - interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); - - float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); - - outputColor.a *= localHeightColor.a; - outputColor.rgb = localHeightColor.rgb * outputColor.a + outputColor.rgb * (1.0f - outputColor.a); - } - } - - return outputColor; -} - -float4 calculateDTMContourColor(in DTMContourSettings contourSettings, in float3 v[3], in uint2 edgePoints[3], in float2 fragPos, in float height) -{ - float4 outputColor; - - LineStyle contourStyle = loadLineStyle(contourSettings.contourLineStyleIdx); - const float contourThickness = (contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; - float stretch = 1.0f; - float phaseShift = 0.0f; - - // TODO: move to ubo or push constants - const float startHeight = contourSettings.contourLinesStartHeight; - const float endHeight = contourSettings.contourLinesEndHeight; - const float interval = contourSettings.contourLinesHeightInterval; - - // TODO: can be precomputed - const int maxContourLineIdx = (endHeight - startHeight + 1) / interval; - - // TODO: it actually can output a negative number, fix - int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight + (interval * 0.5f)) / interval); - contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); - float contourLineHeight = startHeight + interval * contourLineIdx; - - int contourLinePointsIdx = 0; - float2 contourLinePoints[2]; - // TODO: case where heights we are looking for are on all three vertices - for (int i = 0; i < 3; ++i) - { - if (contourLinePointsIdx == 2) - break; - - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - if (p1.z < p0.z) - nbl::hlsl::swap(p0, p1); - - float minHeight = p0.z; - float maxHeight = p1.z; - - if (height >= minHeight && height <= maxHeight) - { - float2 edge = float2(p1.x, p1.y) - float2(p0.x, p0.y); - float scale = (contourLineHeight - minHeight) / (maxHeight - minHeight); - - contourLinePoints[contourLinePointsIdx] = scale * edge + float2(p0.x, p0.y); - ++contourLinePointsIdx; - } - } - - // TODO: comment next line to fix, figure if it was needed - if(contourLinePointsIdx == 2) - { - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); - - float distance = nbl::hlsl::numeric_limits::max; - if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag); - } - else - { - // TODO: - // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! - // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag, clipper); - } - - outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance) * contourStyle.color.a; - outputColor.rgb = contourStyle.color.rgb; - - return outputColor; - } - - return float4(0.0f, 0.0f, 0.0f, 0.0f); -} - -float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in uint2 edgePoints[3], in float2 fragPos, in float3 baryCoord, in float height) -{ - float4 outputColor; - - LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); - const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; - const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); - const float stretch = 1.0f; - - // index of vertex opposing an edge, needed for calculation of triangle heights - uint opposingVertexIdx[3]; - opposingVertexIdx[0] = 2; - opposingVertexIdx[1] = 0; - opposingVertexIdx[2] = 1; - - // find sdf of every edge - float triangleAreaTimesTwo; - { - float3 AB = v[0] - v[1]; - float3 AC = v[0] - v[2]; - AB.z = 0.0f; - AC.z = 0.0f; - - // TODO: figure out if there is a faster solution - triangleAreaTimesTwo = length(cross(AB, AC)); - } - - // calculate sdf of every edge as it wasn't stippled - float distances[3]; - for (int i = 0; i < 3; ++i) - { - const uint2 currentEdgePoints = edgePoints[i]; - float3 A = v[currentEdgePoints[0]]; - float3 B = v[currentEdgePoints[1]]; - float3 AB = B - A; - float ABLen = length(AB); - float triangleHeightToOpositeVertex = triangleAreaTimesTwo / ABLen; - - distances[i] = abs(triangleHeightToOpositeVertex * baryCoord[opposingVertexIdx[i]]); - } - - float minDistance = nbl::hlsl::numeric_limits::max; - if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - for (int i = 0; i < 3; ++i) - { - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - float distance = nbl::hlsl::numeric_limits::max; - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); - - minDistance = min(minDistance, distance); - } - } - else - { - for (int i = 0; i < 3; ++i) - { - const uint2 currentEdgePoints = edgePoints[i]; - float3 p0 = v[currentEdgePoints[0]]; - float3 p1 = v[currentEdgePoints[1]]; - - // long story short, in order for stipple patterns to be consistent: - // - point with lesser x coord should be starting point - // - if x coord of both points are equal then point with lesser y value should be starting point - if (p1.x < p0.x) - nbl::hlsl::swap(p0, p1); - else if (p1.x == p0.x && p1.y < p0.y) - nbl::hlsl::swap(p0, p1); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); - - float distance = nbl::hlsl::numeric_limits::max; - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); - - minDistance = min(minDistance, distance); - } - - } - - outputColor.a = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, minDistance) * outlineStyle.color.a; - outputColor.rgb = outlineStyle.color.rgb; - - return outputColor; -} - -float4 blendColorOnTop(in float4 colorBelow, in float4 colorAbove) -{ - float4 outputColor = colorBelow; - outputColor.rgb = colorAbove.rgb * colorAbove.a + outputColor.rgb * outputColor.a * (1.0f - colorAbove.a); - outputColor.a = colorAbove.a + outputColor.a * (1.0f - colorAbove.a); - - return outputColor; -} - [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -834,30 +138,25 @@ float4 fragMain(PSInput input) : SV_TARGET v[1] = input.getScreenSpaceVertexAttribs(1); v[2] = input.getScreenSpaceVertexAttribs(2); - // indices of points constructing every edge - uint2 edgePoints[3]; - edgePoints[0] = uint2(0, 1); - edgePoints[1] = uint2(1, 2); - edgePoints[2] = uint2(2, 0); - - const float3 baryCoord = calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; float heightDeriv = fwidth(height); float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dtmSettings.drawHeightShadingEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); if (dtmSettings.drawContourEnabled()) { for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder - dtmColor = blendColorOnTop(dtmColor, calculateDTMContourColor(dtmSettings.contourSettings[i], v, edgePoints, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); } if (dtmSettings.drawOutlineEnabled()) - dtmColor = blendColorOnTop(dtmColor, calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, edgePoints, input.position.xy, baryCoord, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy, baryCoord, height)); textureColor = dtmColor.rgb; localAlpha = dtmColor.a; - + gammaUncorrect(textureColor); // want to output to SRGB without gamma correction return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); } @@ -924,7 +223,7 @@ float4 fragMain(PSInput input) : SV_TARGET input.getPolylineConnectorTrapezoidShortBase()); } - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, distance); } else if (objType == ObjectType::CURVE_BOX) { @@ -1042,7 +341,7 @@ float4 fragMain(PSInput input) : SV_TARGET // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); - localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); + localAlpha *= 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f, globals.antiAliasingFactor / 2.0f, msdf); } } else if (objType == ObjectType::FONT_GLYPH) @@ -1073,7 +372,7 @@ float4 fragMain(PSInput input) : SV_TARGET LineStyle style = loadLineStyle(mainObj.styleIdx); const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle - localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f + bolden, globals.antiAliasingFactor / 2.0f + bolden, msdf); } } else if (objType == ObjectType::IMAGE) From 8a771a64002c6f2e2b3af035227ed18fe3947477 Mon Sep 17 00:00:00 2001 From: Przemek Date: Wed, 23 Apr 2025 11:04:00 +0200 Subject: [PATCH 057/129] Fixed blending --- 62_CAD/main.cpp | 2 +- 62_CAD/shaders/main_pipeline/dtm.hlsl | 4 ++-- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 9f5392d4b..822f1b448 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3244,7 +3244,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo{}; - dtmInfo.mode |= E_DTM_MODE::OUTLINE; + //dtmInfo.mode |= E_DTM_MODE::OUTLINE; dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; dtmInfo.mode |= E_DTM_MODE::CONTOUR; diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 5b41eabb3..466b74ee9 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -666,9 +666,9 @@ float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in return outputColor; } -float4 blendUnder(in float4 srcColor, in float4 dstColor) +float4 blendUnder(in float4 dstColor, in float4 srcColor) { - dstColor.rgb = dstColor.rgb * dstColor.a + (1 - dstColor.a) * srcColor.a * srcColor.rgb; + dstColor.rgb = dstColor.rgb + (1 - dstColor.a) * srcColor.a * srcColor.rgb; dstColor.a = (1.0f - srcColor.a) * dstColor.a + srcColor.a; return dstColor; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 6c579dff6..3ac219a66 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -144,15 +144,16 @@ float4 fragMain(PSInput input) : SV_TARGET float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); - if (dtmSettings.drawHeightShadingEnabled()) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + if (dtmSettings.drawOutlineEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy, baryCoord, height)); if (dtmSettings.drawContourEnabled()) { for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); } - if (dtmSettings.drawOutlineEnabled()) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy, baryCoord, height)); + if (dtmSettings.drawHeightShadingEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + textureColor = dtmColor.rgb; localAlpha = dtmColor.a; From fa5d7f1e2ea73767e36319d4e40d704855ceb16d Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 24 Apr 2025 14:05:45 +0200 Subject: [PATCH 058/129] Added `line_style.hlsl` file --- 62_CAD/main.cpp | 42 ++- 62_CAD/shaders/main_pipeline/dtm.hlsl | 296 +----------------- 62_CAD/shaders/main_pipeline/line_style.hlsl | 297 +++++++++++++++++++ 3 files changed, 327 insertions(+), 308 deletions(-) create mode 100644 62_CAD/shaders/main_pipeline/line_style.hlsl diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 822f1b448..6ca03d9d6 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3239,12 +3239,28 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0, 1, 2 };*/ + // HOURGLASS + /*core::vector vertices = { + { float64_t2(0.0, 0.0), 10.0 }, + { float64_t2(-200.0, -200.0), 90.0 }, + { float64_t2(200.0, -200.0), 80.0 }, + + { float64_t2(0.0, 0.0), 10.0 }, + { float64_t2(200.0, 200.0), 90.0 }, + { float64_t2(-200.0, 200.0), 80.0 }, + }; + + core::vector indices = { + 0, 1, 2, + 3, 4, 5 + };*/ + CTriangleMesh mesh; mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo{}; - //dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::OUTLINE; dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; dtmInfo.mode |= E_DTM_MODE::CONTOUR; @@ -3260,7 +3276,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.contourSettings[0u].heightInterval = 10; dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; - dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 1.0f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); @@ -3282,7 +3298,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); @@ -3294,22 +3310,22 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; dtmInfo.heightShadingInfo.isCenteredShading = false; dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); break; } diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 466b74ee9..63e1194e4 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -1,300 +1,7 @@ #ifndef _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ #define _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ -#include -#include - -// TODO: functions outside of the "dtm" namespace need to be moved to another file - -// for usage in upper_bound function -struct StyleAccessor -{ - LineStyle style; - using value_type = float; - - float operator[](const uint32_t ix) - { - return style.getStippleValue(ix); - } -}; - -template -struct StyleClipper -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; - - static StyleClipper construct( - LineStyle style, - CurveType curve, - typename CurveType::ArcLengthCalculator arcLenCalc, - float phaseShift, - float stretch, - float worldToScreenRatio) - { - StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; - - // values for non-uniform stretching with a rigid segment - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - // rigidSegment info in old non stretched pattern - ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; - ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; - ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; - // stretch value for non rigid segments - ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); - // rigidSegment info to new stretched pattern - ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start - ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len - ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end - } - else - { - ret.nonRigidSegmentStretchValue = stretch; - } - - return ret; - } - - // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes - // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern - float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) - { - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment - ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment - ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment - ret *= stretch; - return ret; - } - else - { - return normalizedPlaceInPattern; - } - } - - float_t2 operator()(float_t t) - { - // basicaly 0.0 and 1.0 but with a guardband to discard outside the range - const float_t minT = 0.0 - 1.0; - const float_t maxT = 1.0 + 1.0; - - StyleAccessor styleAccessor = { style }; - const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; - const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); - - const float_t arcLen = arcLenCalc.calcArcLen(t); - const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); - float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); - uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); - - const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; - float_t2 ret = float_t2(InvalidT, InvalidT); - - // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections - const bool notInDrawSection = patternIdx & 0x1; - - // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections - float_t minDrawT = 0.0; - float_t maxDrawT = 1.0; - { - float_t normalizedPlaceInPatternBegin = frac(phaseShift); - normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); - uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); - const bool BeginInNonDrawSection = patternIdxBegin & 0x1; - - if (BeginInNonDrawSection) - { - float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; - diffToRightDrawableSection -= normalizedPlaceInPatternBegin; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; - minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); - } - - // Completely in non-draw section -> clip away: - if (minDrawT >= 1.0) - return ret; - - const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); - const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); - float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); - uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); - const bool EndInNonDrawSection = patternIdxEnd & 0x1; - - if (EndInNonDrawSection) - { - float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; - maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); - } - } - - if (notInDrawSection) - { - float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - - float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; - const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; - float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); - t0 = clamp(t0, minDrawT, maxDrawT); - - float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; - diffToRightDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; - const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; - float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); - t1 = clamp(t1, minDrawT, maxDrawT); - - ret = float_t2(t0, t1); - } - else - { - t = clamp(t, minDrawT, maxDrawT); - ret = float_t2(t, t); - } - - return ret; - } - - LineStyle style; - CurveType curve; - typename CurveType::ArcLengthCalculator arcLenCalc; - float phaseShift; - float stretch; - float worldToScreenRatio; - // precomp value for non uniform stretching - float rigidSegmentStart; - float rigidSegmentEnd; - float rigidSegmentLen; - float nonRigidSegmentStretchValue; -}; - -typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; -typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; - -template -struct DefaultClipper -{ - using float_t2 = vector; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; - - static DefaultClipper construct() - { - DefaultClipper ret; - return ret; - } - - inline float_t2 operator()(const float_t t) - { - const float_t ret = clamp(t, 0.0, 1.0); - return float_t2(ret, ret); - } -}; - -template > -struct ClippedSignedDistance -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - - const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) - { - typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); - - const float_t InvalidT = nbl::hlsl::numeric_limits::max; - // TODO: Fix and test, we're not working with squared distance anymore - const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? - - bool clipped = false; - float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; - float_t closestT = InvalidT; - [[unroll(CurveType::MaxCandidates)]] - for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) - { - const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); - if (candidateDistanceSquared < closestDistanceSquared) - { - float_t2 snappedTs = clipper(candidates[i]); - - if (snappedTs[0] == InvalidT) - { - continue; - } - - if (snappedTs[0] != candidates[i]) - { - // left snapped or clamped - const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); - if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[0]; - closestDistanceSquared = leftSnappedCandidateDistanceSquared; - } - - if (snappedTs[0] != snappedTs[1]) - { - // right snapped or clamped - const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); - if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[1]; - closestDistanceSquared = rightSnappedCandidateDistanceSquared; - } - } - } - else - { - // no snapping - if (candidateDistanceSquared < closestDistanceSquared) - { - clipped = false; - closestT = candidates[i]; - closestDistanceSquared = candidateDistanceSquared; - } - } - } - } - - - float_t roundedDistance = closestDistanceSquared - thickness; - if (!isRoadStyle) - { - return roundedDistance; - } - else - { - const float_t aaWidth = globals.antiAliasingFactor; - float_t rectCappedDistance = roundedDistance; - - if (clipped) - { - float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); - rectCappedDistance = capSquare(q, thickness, aaWidth); - } - - return rectCappedDistance; - } - } - - static float capSquare(float_t2 q, float_t th, float_t aaWidth) - { - float_t2 d = abs(q) - float_t2(aaWidth, th); - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); - } -}; +#include "line_style.hlsl" namespace dtm { @@ -316,7 +23,6 @@ float dot2(in float2 vec) return dot(vec, vec); } -// TODO: Later move these functions and structs to dtmSettings.hlsl and a namespace like dtmSettings::height_shading or dtmSettings::contours, etc.. struct HeightSegmentTransitionData { float currentHeight; diff --git a/62_CAD/shaders/main_pipeline/line_style.hlsl b/62_CAD/shaders/main_pipeline/line_style.hlsl new file mode 100644 index 000000000..f50127667 --- /dev/null +++ b/62_CAD/shaders/main_pipeline/line_style.hlsl @@ -0,0 +1,297 @@ +#ifndef _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ + +#include +#include + +// for usage in upper_bound function +struct StyleAccessor +{ + LineStyle style; + using value_type = float; + + float operator[](const uint32_t ix) + { + return style.getStippleValue(ix); + } +}; + +template +struct StyleClipper +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; + + static StyleClipper construct( + LineStyle style, + CurveType curve, + typename CurveType::ArcLengthCalculator arcLenCalc, + float phaseShift, + float stretch, + float worldToScreenRatio) + { + StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; + + // values for non-uniform stretching with a rigid segment + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + // rigidSegment info in old non stretched pattern + ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; + ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; + ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; + // stretch value for non rigid segments + ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); + // rigidSegment info to new stretched pattern + ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start + ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len + ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end + } + else + { + ret.nonRigidSegmentStretchValue = stretch; + } + + return ret; + } + + // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes + // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern + float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) + { + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment + ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment + ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment + ret *= stretch; + return ret; + } + else + { + return normalizedPlaceInPattern; + } + } + + float_t2 operator()(float_t t) + { + // basicaly 0.0 and 1.0 but with a guardband to discard outside the range + const float_t minT = 0.0 - 1.0; + const float_t maxT = 1.0 + 1.0; + + StyleAccessor styleAccessor = { style }; + const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; + const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); + + const float_t arcLen = arcLenCalc.calcArcLen(t); + const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); + float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); + uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); + + const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; + float_t2 ret = float_t2(InvalidT, InvalidT); + + // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections + const bool notInDrawSection = patternIdx & 0x1; + + // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections + float_t minDrawT = 0.0; + float_t maxDrawT = 1.0; + { + float_t normalizedPlaceInPatternBegin = frac(phaseShift); + normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); + uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); + const bool BeginInNonDrawSection = patternIdxBegin & 0x1; + + if (BeginInNonDrawSection) + { + float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; + diffToRightDrawableSection -= normalizedPlaceInPatternBegin; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; + minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); + } + + // Completely in non-draw section -> clip away: + if (minDrawT >= 1.0) + return ret; + + const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); + const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); + float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); + uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); + const bool EndInNonDrawSection = patternIdxEnd & 0x1; + + if (EndInNonDrawSection) + { + float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; + maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); + } + } + + if (notInDrawSection) + { + float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + + float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; + const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; + float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); + t0 = clamp(t0, minDrawT, maxDrawT); + + float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; + diffToRightDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; + const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; + float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); + t1 = clamp(t1, minDrawT, maxDrawT); + + ret = float_t2(t0, t1); + } + else + { + t = clamp(t, minDrawT, maxDrawT); + ret = float_t2(t, t); + } + + return ret; + } + + LineStyle style; + CurveType curve; + typename CurveType::ArcLengthCalculator arcLenCalc; + float phaseShift; + float stretch; + float worldToScreenRatio; + // precomp value for non uniform stretching + float rigidSegmentStart; + float rigidSegmentEnd; + float rigidSegmentLen; + float nonRigidSegmentStretchValue; +}; + +typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; +typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; + +template +struct DefaultClipper +{ + using float_t2 = vector; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; + + static DefaultClipper construct() + { + DefaultClipper ret; + return ret; + } + + inline float_t2 operator()(const float_t t) + { + const float_t ret = clamp(t, 0.0, 1.0); + return float_t2(ret, ret); + } +}; + +template > +struct ClippedSignedDistance +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + + const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) + { + typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); + + const float_t InvalidT = nbl::hlsl::numeric_limits::max; + // TODO: Fix and test, we're not working with squared distance anymore + const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? + + bool clipped = false; + float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; + float_t closestT = InvalidT; + [[unroll(CurveType::MaxCandidates)]] + for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) + { + const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); + if (candidateDistanceSquared < closestDistanceSquared) + { + float_t2 snappedTs = clipper(candidates[i]); + + if (snappedTs[0] == InvalidT) + { + continue; + } + + if (snappedTs[0] != candidates[i]) + { + // left snapped or clamped + const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); + if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[0]; + closestDistanceSquared = leftSnappedCandidateDistanceSquared; + } + + if (snappedTs[0] != snappedTs[1]) + { + // right snapped or clamped + const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); + if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[1]; + closestDistanceSquared = rightSnappedCandidateDistanceSquared; + } + } + } + else + { + // no snapping + if (candidateDistanceSquared < closestDistanceSquared) + { + clipped = false; + closestT = candidates[i]; + closestDistanceSquared = candidateDistanceSquared; + } + } + } + } + + + float_t roundedDistance = closestDistanceSquared - thickness; + if (!isRoadStyle) + { + return roundedDistance; + } + else + { + const float_t aaWidth = globals.antiAliasingFactor; + float_t rectCappedDistance = roundedDistance; + + if (clipped) + { + float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); + rectCappedDistance = capSquare(q, thickness, aaWidth); + } + + return rectCappedDistance; + } + } + + static float capSquare(float_t2 q, float_t th, float_t aaWidth) + { + float_t2 d = abs(q) - float_t2(aaWidth, th); + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); + } +}; + +#endif \ No newline at end of file From 9ddaa810cf0bfc19130f66de25f43b1be3ad8092 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 25 Apr 2025 12:05:23 +0330 Subject: [PATCH 059/129] emulated float64 bug repro --- 62_CAD/main.cpp | 82 ++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 6ca03d9d6..e901d07c3 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -58,6 +58,7 @@ enum class ExampleMode CASE_7, // Images CASE_8, // MSDF and Text CASE_9, // DTM + CASE_BUG, // Bug Repro CASE_COUNT }; @@ -72,10 +73,11 @@ constexpr std::array cameraExtents = 10.0, // CASE_6 10.0, // CASE_7 600.0, // CASE_8 - 600.0 // CASE_9 + 600.0, // CASE_9 + 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_9; +constexpr ExampleMode mode = ExampleMode::CASE_BUG; class Camera2D { @@ -3239,28 +3241,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0, 1, 2 };*/ - // HOURGLASS - /*core::vector vertices = { - { float64_t2(0.0, 0.0), 10.0 }, - { float64_t2(-200.0, -200.0), 90.0 }, - { float64_t2(200.0, -200.0), 80.0 }, - - { float64_t2(0.0, 0.0), 10.0 }, - { float64_t2(200.0, 200.0), 90.0 }, - { float64_t2(-200.0, 200.0), 80.0 }, - }; - - core::vector indices = { - 0, 1, 2, - 3, 4, 5 - };*/ - CTriangleMesh mesh; mesh.setVertices(std::move(vertices)); mesh.setIndices(std::move(indices)); DTMSettingsInfo dtmInfo{}; - dtmInfo.mode |= E_DTM_MODE::OUTLINE; + //dtmInfo.mode |= E_DTM_MODE::OUTLINE; dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; dtmInfo.mode |= E_DTM_MODE::CONTOUR; @@ -3276,7 +3262,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.contourSettings[0u].heightInterval = 10; dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; - dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 1.0f); + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); @@ -3298,7 +3284,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); @@ -3310,22 +3296,22 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; dtmInfo.heightShadingInfo.isCenteredShading = false; dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: { dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, 1.0f)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); break; } @@ -3343,6 +3329,40 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); } + else if (mode == ExampleMode::CASE_BUG) + { + CPolyline polyline; + + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 1.0f; + style.worldSpaceLineWidth = 0.0f; + style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); + + for (uint32_t i = 0; i < 128u; ++i) + { + std::vector> quadBeziers; + curves::EllipticalArcInfo myCircle; + { + myCircle.majorAxis = { 0.05 , 0.0}; + myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; + myCircle.angleBounds = { + nbl::core::PI() * 0.0, + nbl::core::PI() * 2.0 + }; + myCircle.eccentricity = 1.0; + } + + curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void + { + quadBeziers.push_back(info); + }; + + curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); + polyline.addQuadBeziers(quadBeziers); + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + polyline.clearEverything(); + } + } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } From 12f3d84898db815ec5f6610daba753a1cd428a03 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 30 Apr 2025 08:46:25 +0400 Subject: [PATCH 060/129] grid dtm tasks --- 62_CAD/DrawResourcesFiller.cpp | 3 +++ 62_CAD/shaders/globals.hlsl | 5 +++++ .../shaders/main_pipeline/fragment_shader.hlsl | 17 +++++++++++++++++ 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 1 + 4 files changed, 26 insertions(+) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 30fb6d748..d12837691 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -281,6 +281,9 @@ void DrawResourcesFiller::drawFontGlyph( } } +// TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo +// We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) + void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) { auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 045e11f1e..d12c80bef 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -231,6 +231,11 @@ struct ImageObjectInfo uint32_t textureID; // 4 bytes (32) }; +/* +GRID DTM Info similar to `ImageObjectInfo` +other than textureID, there will be dtmSettingsIdx referencing a dtmSettings +*/ + static uint32_t packR11G11B10_UNORM(float32_t3 color) { // Scale and convert to integers diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 3ac219a66..326c4cf0d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -388,6 +388,23 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = colorSample.a; } } + // objType GRID_DTM here + { + // NOTE: create and read from a texture as a last step, you can generate the height values procedurally from a function while you're working on the sdf stuff. + + // Query dtm settings + // use texture Gather to get 4 corners: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-to-gather + // A. the outlines can be stippled, use phaseshift of the line such that they started from the grid's origin worldspace coordinate + // B. the contours are computed for triangles, use the same function as for dtms, choose between the two triangles based on local UV coords in current cell + // Make it so we can choose which diagonal to use to construct the triangle, it's either u=v or u=1-v + // C. Height shading same as contours (split into two triangles) + + // Heights can have invalid values (let's say NaN) if a cell corner has NaN value then no triangle (for contour and shading) and no outline should include that corner. (see DTM image in discord with gaps) + + // TODO: we need to emulate dilation and do sdf of neighbouring cells as well. because contours, outlines and shading can bleed into other cells for AA. + // [NOTE] Do dilation as last step, when everything else works fine + } + uint2 fragCoord = uint2(input.position.xy); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 7ce0f43e7..9d4a384a1 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -620,6 +620,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setImageUV(uv); outV.setImageTextureId(textureID); } + // TODO: Przemek objType GRID_DTM, Similar transformations to IMAGE // Make the cage fullscreen for testing: #if 0 From 3e1cd3e5496b3fdd56b6ef6f628a332c1e0599b0 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 1 May 2025 14:03:16 +0400 Subject: [PATCH 061/129] Fix and separate custom clip projection --- 62_CAD/DrawResourcesFiller.cpp | 88 ++++++++++++++----- 62_CAD/DrawResourcesFiller.h | 67 +++++++++----- 62_CAD/main.cpp | 65 +++++++++----- 62_CAD/shaders/globals.hlsl | 43 +++++---- .../shaders/main_pipeline/vertex_shader.hlsl | 38 ++++++-- 5 files changed, 206 insertions(+), 95 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index d12837691..759db16f3 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -376,19 +376,33 @@ void DrawResourcesFiller::endMainObject() activeMainObjectIndex = InvalidMainObjectIdx; } -void DrawResourcesFiller::pushClipProjectionData(const ClipProjectionData& clipProjectionData) +void DrawResourcesFiller::pushCustomProjection(const float64_t3x3& projection) { - activeClipProjections.push_back(clipProjectionData); - activeClipProjectionIndices.push_back(InvalidClipProjectionIndex); + activeProjections.push_back(projection); + activeProjectionIndices.push_back(InvalidCustomProjectionIndex); } -void DrawResourcesFiller::popClipProjectionData() +void DrawResourcesFiller::popCustomProjection() { - if (activeClipProjections.empty()) + if (activeProjections.empty()) return; - activeClipProjections.pop_back(); - activeClipProjectionIndices.pop_back(); + activeProjections.pop_back(); + activeProjectionIndices.pop_back(); +} + +void DrawResourcesFiller::pushCustomClipRect(const WorldClipRect& clipRect) +{ + activeClipRects.push_back(clipRect); + activeClipRectIndices.push_back(InvalidCustomClipRectIndex); +} + +void DrawResourcesFiller::popCustomClipRect() +{ if (activeClipRects.empty()) + return; + + activeClipRects.pop_back(); + activeClipRectIndices.pop_back(); } bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) @@ -437,7 +451,8 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext copyCPUFilledDrawBuffer(resourcesCollection.lineStyles); copyCPUFilledDrawBuffer(resourcesCollection.dtmSettings); - copyCPUFilledDrawBuffer(resourcesCollection.clipProjections); + copyCPUFilledDrawBuffer(resourcesCollection.customProjections); + copyCPUFilledDrawBuffer(resourcesCollection.customClipRects); copyCPUFilledDrawBuffer(resourcesCollection.mainObjects); copyCPUFilledDrawBuffer(resourcesCollection.drawObjects); copyCPUFilledDrawBuffer(resourcesCollection.indexBuffer); @@ -703,15 +718,26 @@ uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SInte return activeDTMSettingsIndex; } -uint32_t DrawResourcesFiller::acquireActiveClipProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { - if (activeClipProjectionIndices.empty()) - return InvalidClipProjectionIndex; + if (activeProjectionIndices.empty()) + return InvalidCustomProjectionIndex; - if (activeClipProjectionIndices.back() == InvalidClipProjectionIndex) - activeClipProjectionIndices.back() = addClipProjectionData_SubmitIfNeeded(activeClipProjections.back(), intendedNextSubmit); + if (activeProjectionIndices.back() == InvalidCustomProjectionIndex) + activeProjectionIndices.back() = addCustomProjection_SubmitIfNeeded(activeProjections.back(), intendedNextSubmit); - return activeClipProjectionIndices.back(); + return activeProjectionIndices.back(); +} + +uint32_t DrawResourcesFiller::acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeClipRectIndices.empty()) + return InvalidCustomClipRectIndex; + + if (activeClipRectIndices.back() == InvalidCustomClipRectIndex) + activeClipRectIndices.back() = addCustomClipRect_SubmitIfNeeded(activeClipRects.back(), intendedNextSubmit); + + return activeClipRectIndices.back(); } uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) @@ -729,14 +755,16 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten (activeMainObjectType == MainObjectType::HATCH) || (activeMainObjectType == MainObjectType::TEXT); const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM); - const bool needsCustomClipProjection = (!activeClipProjectionIndices.empty()); + const bool needsCustomProjection = (!activeProjectionIndices.empty()); + const bool needsCustomClipRect = (!activeClipRectIndices.empty()); const size_t remainingResourcesSize = calculateRemainingResourcesSize(); // making sure MainObject and everything it references fits into remaining resources mem size_t memRequired = sizeof(MainObject); if (needsLineStyle) memRequired += sizeof(LineStyle); if (needsDTMSettings) memRequired += sizeof(DTMSettings); - if (needsCustomClipProjection) memRequired += sizeof(ClipProjectionData); + if (needsCustomProjection) memRequired += sizeof(float64_t3x3); + if (needsCustomClipRect) memRequired += sizeof(WorldClipRect); const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); @@ -754,7 +782,8 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? mainObject.styleIdx = (needsLineStyle) ? acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidStyleIdx; mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; - mainObject.clipProjectionIndex = (needsCustomClipProjection) ? acquireActiveClipProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidClipProjectionIndex; + mainObject.customProjectionIndex = (needsCustomProjection) ? acquireActiveCustomProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomProjectionIndex; + mainObject.customClipRectIndex = (needsCustomClipRect) ? acquireActiveCustomClipRectIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomClipRectIndex; activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); return activeMainObjectIndex; } @@ -793,10 +822,27 @@ uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInf return outDTMSettingIdx; } -uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(float64_t3x3); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) + { + finalizeAllCopiesToGPU(intendedNextSubmit); + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + resourcesCollection.customProjections.vector.push_back(projection); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customProjections.vector.size() - 1u; +} + +uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit) { const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - const size_t memRequired = sizeof(ClipProjectionData); + const size_t memRequired = sizeof(WorldClipRect); const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? if (!enoughMem) @@ -806,8 +852,8 @@ uint32_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipPro reset(); // resets everything! be careful! } - resourcesCollection.clipProjections.vector.push_back(clipProjectionData); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers - return resourcesCollection.clipProjections.vector.size() - 1u; + resourcesCollection.customClipRects.vector.push_back(clipRect); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customClipRects.vector.size() - 1u; } void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 1e244ae01..b92685959 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -14,9 +14,8 @@ using namespace nbl::asset; using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 12u); +static_assert(sizeof(MainObject) == 16u); static_assert(sizeof(LineStyle) == 88u); -static_assert(sizeof(ClipProjectionData) == 88u); // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. @@ -92,7 +91,8 @@ struct DrawResourcesFiller // auto-submission level 0 resources (settings that mainObj references) CPUGeneratedResource lineStyles; CPUGeneratedResource dtmSettings; - CPUGeneratedResource clipProjections; + CPUGeneratedResource customProjections; + CPUGeneratedResource customClipRects; // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) CPUGeneratedResource mainObjects; @@ -109,7 +109,8 @@ struct DrawResourcesFiller return lineStyles.getAlignedStorageSize() + dtmSettings.getAlignedStorageSize() + - clipProjections.getAlignedStorageSize() + + customProjections.getAlignedStorageSize() + + customClipRects.getAlignedStorageSize() + mainObjects.getAlignedStorageSize() + drawObjects.getAlignedStorageSize() + indexBuffer.getAlignedStorageSize() + @@ -129,7 +130,7 @@ struct DrawResourcesFiller { // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) - return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(ClipProjectionData) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); + return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(WorldClipRect) + sizeof(float64_t3x3) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); } void allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); @@ -207,7 +208,8 @@ struct DrawResourcesFiller { resetDrawObjects(); resetMainObjects(); - resetCustomClipProjections(); + resetCustomProjections(); + resetCustomClipRects(); resetLineStyles(); resetDTMSettings(); @@ -231,10 +233,14 @@ struct DrawResourcesFiller void beginMainObject(MainObjectType type); void endMainObject(); - void pushClipProjectionData(const ClipProjectionData& clipProjectionData); - void popClipProjectionData(); + void pushCustomProjection(const float64_t3x3& projection); + void popCustomProjection(); + + void pushCustomClipRect(const WorldClipRect& clipRect); + void popCustomClipRect(); - const std::deque& getClipProjectionStack() const { return activeClipProjections; } + const std::deque& getProjectionStack() const { return activeProjections; } + const std::deque& getClipRectsStack() const { return activeClipRects; } smart_refctd_ptr getMSDFsTextureArray() { return msdfTextureArray; } @@ -317,9 +323,13 @@ struct DrawResourcesFiller // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - // Gets resource index to the active clip projection data from the top of stack + // Gets resource index to the active projection data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active clip data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) - uint32_t acquireActiveClipProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + uint32_t acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); // Gets resource index to the active main object data // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) @@ -331,8 +341,11 @@ struct DrawResourcesFiller /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - /// Attempts to add clipProjection to resources. If it fails to do, due to resource limitations, auto-submits and tries again. - uint32_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); + /// Attempts to add custom projection to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add custom clip to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit); /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); @@ -372,13 +385,22 @@ struct DrawResourcesFiller resourcesCollection.geometryInfo.vector.clear(); } - void resetCustomClipProjections() + void resetCustomProjections() { - resourcesCollection.clipProjections.vector.clear(); + resourcesCollection.customProjections.vector.clear(); - // Invalidate all the clip projection addresses because activeClipProjections buffer got reset - for (auto& clipProjAddr : activeClipProjectionIndices) - clipProjAddr = InvalidClipProjectionIndex; + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeProjectionIndices) + addr = InvalidCustomProjectionIndex; + } + + void resetCustomClipRects() + { + resourcesCollection.customClipRects.vector.clear(); + + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeClipRectIndices) + addr = InvalidCustomClipRectIndex; } void resetLineStyles() @@ -502,9 +524,12 @@ struct DrawResourcesFiller MainObjectType activeMainObjectType; uint32_t activeMainObjectIndex = InvalidMainObjectIdx; - // The ClipProjections are stack, because user can push/pop ClipProjections in any order - std::deque activeClipProjections; // stack of clip projections stored so we can resubmit them if geometry buffer got reset. - std::deque activeClipProjectionIndices; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops + // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order + std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. + std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops + + std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. + std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops // MSDF GetGlyphMSDFTextureFunc getGlyphMSDF; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index e901d07c3..755a649b2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -77,7 +77,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_BUG; +constexpr ExampleMode mode = ExampleMode::CASE_6; class Camera2D { @@ -1208,17 +1208,16 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu globalData.pointers = { .lineStyles = baseAddress + resources.lineStyles.bufferOffset, .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, - .customClipProjections = baseAddress + resources.clipProjections.bufferOffset, + .customProjections = baseAddress + resources.customProjections.bufferOffset, + .customClipRects = baseAddress + resources.customClipRects.bufferOffset, .mainObjects = baseAddress + resources.mainObjects.bufferOffset, .drawObjects = baseAddress + resources.drawObjects.bufferOffset, .geometryBuffer = baseAddress + resources.geometryInfo.bufferOffset, }; globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - globalData.defaultClipProjection.projectionToNDC = projectionToNDC; - globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); - globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); - float screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); + globalData.defaultProjectionToNDC = projectionToNDC; + float screenToWorld = getScreenToWorldRatio(globalData.defaultProjectionToNDC, globalData.resolution); globalData.screenToWorldRatio = screenToWorld; globalData.worldToScreenRatio = (1.0f/screenToWorld); globalData.miterLimit = 10.0f; @@ -2715,16 +2714,19 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_6) { + float64_t3x3 customProjection = float64_t3x3{ + 1.0, 0.0, cos(m_timeElapsed * 0.0005) * 100.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + // left half of screen should be red and right half should be green - const auto& cameraProj = m_Camera.constructViewProjection(); - ClipProjectionData showLeft = {}; - showLeft.projectionToNDC = cameraProj; - showLeft.minClipNDC = float32_t2(-1.0, -1.0); - showLeft.maxClipNDC = float32_t2(0.0, +1.0); - ClipProjectionData showRight = {}; - showRight.projectionToNDC = cameraProj; - showRight.minClipNDC = float32_t2(0.0, -1.0); - showRight.maxClipNDC = float32_t2(+1.0, +1.0); + WorldClipRect showLeft = {}; + showLeft.minClip = float64_t2(-100.0, -1000.0); + showLeft.maxClip = float64_t2(0.0, +1000.0); + WorldClipRect showRight = {}; + showRight.minClip = float64_t2(0.0, -1000.0); + showRight.maxClip = float64_t2(100.0, +1000.0); LineStyleInfo leftLineStyle = {}; leftLineStyle.screenSpaceLineWidth = 3.0f; @@ -2779,35 +2781,37 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } // we do redundant and nested push/pops to test - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); + drawResourcesFiller.pushCustomProjection(customProjection); { drawResourcesFiller.drawPolyline(polyline1, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomProjection(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); { drawResourcesFiller.drawPolyline(polyline3, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } else if (mode == ExampleMode::CASE_7) @@ -3362,6 +3366,21 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); polyline.clearEverything(); } + + float64_t2 line0[2u] = + { + float64_t2(-1.0, 0.0), + float64_t2(+1.0, 0.0), + }; + float64_t2 line1[2u] = + { + float64_t2(0.0, -1.0), + float64_t2(0.0, +1.0), + }; + + polyline.addLinePoints(line0); + polyline.addLinePoints(line1); + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index d12c80bef..d3a4968bb 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -38,39 +38,30 @@ struct PushConstants uint32_t isDTMRendering; }; -// TODO: Compute this in a compute shader from the world counterparts -// because this struct includes NDC coordinates, the values will change based camera zoom and move -// of course we could have the clip values to be in world units and also the matrix to transform to world instead of ndc but that requires extra computations(matrix multiplications) per vertex -struct ClipProjectionData -{ - pfloat64_t3x3 projectionToNDC; // 72 -> because we use scalar_layout - float32_t2 minClipNDC; // 80 - float32_t2 maxClipNDC; // 88 +struct WorldClipRect +{ + pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) + pfloat64_t2 maxClip; // max clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) }; -#ifndef __HLSL_VERSION -static_assert(offsetof(ClipProjectionData, projectionToNDC) == 0u); -static_assert(offsetof(ClipProjectionData, minClipNDC) == 72u); -static_assert(offsetof(ClipProjectionData, maxClipNDC) == 80u); -#endif - struct Pointers { uint64_t lineStyles; uint64_t dtmSettings; - uint64_t customClipProjections; + uint64_t customProjections; + uint64_t customClipRects; uint64_t mainObjects; uint64_t drawObjects; uint64_t geometryBuffer; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Pointers) == 48u); +static_assert(sizeof(Pointers) == 56u); #endif struct Globals { Pointers pointers; - ClipProjectionData defaultClipProjection; + pfloat64_t3x3 defaultProjectionToNDC; float screenToWorldRatio; float worldToScreenRatio; uint32_t2 resolution; @@ -80,7 +71,7 @@ struct Globals float32_t _padding; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 168u); +static_assert(sizeof(Globals) == 160u); #endif #ifdef __HLSL_VERSION @@ -143,11 +134,14 @@ enum class MajorAxis : uint32_t }; // Consists of multiple DrawObjects +// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound +// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work struct MainObject { uint32_t styleIdx; uint32_t dtmSettingsIdx; - uint32_t clipProjectionIndex; + uint32_t customProjectionIndex; + uint32_t customClipRectIndex; }; struct DrawObject @@ -496,7 +490,8 @@ NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint32_t InvalidClipProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; // Hatches @@ -521,9 +516,13 @@ DTMSettings loadDTMSettings(const uint32_t index) { return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 8u); } -ClipProjectionData loadCustomClipProjection(const uint32_t index) +pfloat64_t3x3 loadCustomProjection(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customProjections + index * sizeof(pfloat64_t3x3), 8u); +} +WorldClipRect loadCustomClipRect(const uint32_t index) { - return vk::RawBufferLoad(globals.pointers.customClipProjections + index * sizeof(ClipProjectionData), 8u); + return vk::RawBufferLoad(globals.pointers.customClipRects + index * sizeof(WorldClipRect), 8u); } MainObject loadMainObject(const uint32_t index) { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 9d4a384a1..66101410e 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -23,17 +23,39 @@ float2 QuadraticBezier(float2 p0, float2 p1, float2 p2, float t) return shapes::QuadraticBezier::construct(p0, p1, p2).evaluate(t); } -ClipProjectionData getClipProjectionData(in MainObject mainObj) +struct NDCClipProjectionData { - if (mainObj.clipProjectionIndex != InvalidClipProjectionIndex) + pfloat64_t3x3 projectionToNDC; // pre-multiplied projection in a tree + float32_t2 minClipNDC; + float32_t2 maxClipNDC; +}; + +NDCClipProjectionData getClipProjectionData(in MainObject mainObj) +{ + NDCClipProjectionData ret; + if (mainObj.customProjectionIndex != InvalidCustomProjectionIndex) { -#ifdef NBL_2D_SHOWCASE_MODE - return nbl::hlsl::mul(globals.defaultClipProjection.projectionToNDC, loadCustomClipProjection(mainObj.clipProjectionIndex)); -#endif - return loadCustomClipProjection(mainObj.clipProjectionIndex); + // If projection type is worldspace projection and clip: + pfloat64_t3x3 customProjection = loadCustomProjection(mainObj.customProjectionIndex); + ret.projectionToNDC = nbl::hlsl::mul(globals.defaultProjectionToNDC, customProjection); + } + else + ret.projectionToNDC = globals.defaultProjectionToNDC; + + if (mainObj.customClipRectIndex != InvalidCustomClipRectIndex) + { + WorldClipRect worldClipRect = loadCustomClipRect(mainObj.customClipRectIndex); + + /// [NOTE]: Optimization: we avoid looking for min/max in the shader because minClip and maxClip in default worldspace are defined in such a way that minClip.y > maxClip.y so minClipNDC.y < maxClipNDC.y + ret.minClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.minClip)); + ret.maxClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.maxClip)); } else - return globals.defaultClipProjection; + { + ret.minClipNDC = float2(-1.0f, -1.0f); + ret.maxClipNDC = float2(+1.0f, +1.0f); + } + return ret; } float2 transformPointScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolution, pfloat64_t2 point2d) @@ -83,7 +105,7 @@ void dilateHatch(out float2 outOffsetVec, out float2 outUV, const float2 PSInput main(uint vertexID : SV_VertexID) { - ClipProjectionData clipProjectionData; + NDCClipProjectionData clipProjectionData; PSInput outV; From 6e27153186f8c94b34ea3de1ce03179a799721e0 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 1 May 2025 14:04:45 +0400 Subject: [PATCH 062/129] small example edit --- 62_CAD/main.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 755a649b2..fc40daccb 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -2720,13 +2720,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0.0, 0.0, 1.0 }; - // left half of screen should be red and right half should be green + /// [NOTE]: We set minClip and maxClip (in default worldspace) in such a way that minClip.y > maxClip.y so that minClipNDC.y < maxClipNDC.y + // left half should be red and right half should be green WorldClipRect showLeft = {}; - showLeft.minClip = float64_t2(-100.0, -1000.0); - showLeft.maxClip = float64_t2(0.0, +1000.0); + showLeft.minClip = float64_t2(-100.0, +1000.0); + showLeft.maxClip = float64_t2(0.0, -1000.0); WorldClipRect showRight = {}; - showRight.minClip = float64_t2(0.0, -1000.0); - showRight.maxClip = float64_t2(100.0, +1000.0); + showRight.minClip = float64_t2(0.0, +1000.0); + showRight.maxClip = float64_t2(100.0, -1000.0); LineStyleInfo leftLineStyle = {}; leftLineStyle.screenSpaceLineWidth = 3.0f; From 6706df65de1b09513930d8e40e7d2c67d1f3b42c Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 2 May 2025 09:47:59 +0400 Subject: [PATCH 063/129] plus should be drawn, emulated fp64 bug --- 62_CAD/DrawResourcesFiller.h | 4 +- 62_CAD/main.cpp | 50 +++++++++---------- .../shaders/main_pipeline/vertex_shader.hlsl | 9 ++++ 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index b92685959..c6ae52920 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -239,8 +239,8 @@ struct DrawResourcesFiller void pushCustomClipRect(const WorldClipRect& clipRect); void popCustomClipRect(); - const std::deque& getProjectionStack() const { return activeProjections; } - const std::deque& getClipRectsStack() const { return activeClipRects; } + const std::deque& getCustomProjectionStack() const { return activeProjections; } + const std::deque& getCustomClipRectsStack() const { return activeClipRects; } smart_refctd_ptr getMSDFsTextureArray() { return msdfTextureArray; } diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index fc40daccb..89938d2b0 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -77,7 +77,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_6; +constexpr ExampleMode mode = ExampleMode::CASE_BUG; class Camera2D { @@ -3343,30 +3343,30 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu style.worldSpaceLineWidth = 0.0f; style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); - for (uint32_t i = 0; i < 128u; ++i) - { - std::vector> quadBeziers; - curves::EllipticalArcInfo myCircle; - { - myCircle.majorAxis = { 0.05 , 0.0}; - myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; - myCircle.angleBounds = { - nbl::core::PI() * 0.0, - nbl::core::PI() * 2.0 - }; - myCircle.eccentricity = 1.0; - } - - curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void - { - quadBeziers.push_back(info); - }; - - curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); - polyline.addQuadBeziers(quadBeziers); - drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); - polyline.clearEverything(); - } + //for (uint32_t i = 0; i < 128u; ++i) + //{ + // std::vector> quadBeziers; + // curves::EllipticalArcInfo myCircle; + // { + // myCircle.majorAxis = { 0.05 , 0.0}; + // myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; + // myCircle.angleBounds = { + // nbl::core::PI() * 0.0, + // nbl::core::PI() * 2.0 + // }; + // myCircle.eccentricity = 1.0; + // } + + // curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void + // { + // quadBeziers.push_back(info); + // }; + + // curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); + // polyline.addQuadBeziers(quadBeziers); + // drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + // polyline.clearEverything(); + //} float64_t2 line0[2u] = { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 66101410e..c979f3b0b 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -32,9 +32,15 @@ struct NDCClipProjectionData NDCClipProjectionData getClipProjectionData(in MainObject mainObj) { + pfloat64_t3x3 weirdProjection = nbl::hlsl::_static_cast( + float32_t3x3(1.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 1.0f)); + NDCClipProjectionData ret; if (mainObj.customProjectionIndex != InvalidCustomProjectionIndex) { + // If projection type is worldspace projection and clip: pfloat64_t3x3 customProjection = loadCustomProjection(mainObj.customProjectionIndex); ret.projectionToNDC = nbl::hlsl::mul(globals.defaultProjectionToNDC, customProjection); @@ -55,6 +61,9 @@ NDCClipProjectionData getClipProjectionData(in MainObject mainObj) ret.minClipNDC = float2(-1.0f, -1.0f); ret.maxClipNDC = float2(+1.0f, +1.0f); } + + ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, weirdProjection); + return ret; } From c9d2abf21fb9c4af6407b53a4e7025be5ed28d8e Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 2 May 2025 11:19:31 +0400 Subject: [PATCH 064/129] Fixed Geometry and Fixed Screenspace sized polylines handle --- 62_CAD/DrawResourcesFiller.cpp | 32 +++++++++++++++++-- 62_CAD/DrawResourcesFiller.h | 11 +++++-- 62_CAD/shaders/globals.hlsl | 14 ++++++-- .../shaders/main_pipeline/vertex_shader.hlsl | 9 ++---- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 759db16f3..b2f4e4950 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -87,11 +87,36 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line setActiveLineStyle(lineStyleInfo); - beginMainObject(MainObjectType::POLYLINE); + beginMainObject(MainObjectType::POLYLINE, TransformationType::NORMAL); drawPolyline(polyline, intendedNextSubmit); endMainObject(); } +void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!lineStyleInfo.isVisible()) + return; + + setActiveLineStyle(lineStyleInfo); + + if (!activeProjections.empty()) + { + // if there is already an active custom projection, it should be considered into the transformation of the fixed geometry polyline + float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); + pushCustomProjection(newTransformation); + } + else + { + // will be multiplied by the default projection matrix from the left (in shader), no need to consider it here + pushCustomProjection(transformation); + } + + beginMainObject(MainObjectType::POLYLINE, TransformationType::FIXED_SCREENSPACE_SIZE); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); + popCustomProjection(); +} + void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit) { uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); @@ -364,15 +389,17 @@ void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSetting activeDTMSettingsIndex = InvalidDTMSettingsIdx; } -void DrawResourcesFiller::beginMainObject(MainObjectType type) +void DrawResourcesFiller::beginMainObject(MainObjectType type, TransformationType transformationType) { activeMainObjectType = type; + activeMainObjectTransformationType = transformationType; activeMainObjectIndex = InvalidMainObjectIdx; } void DrawResourcesFiller::endMainObject() { activeMainObjectType = MainObjectType::NONE; + activeMainObjectTransformationType = TransformationType::NORMAL; activeMainObjectIndex = InvalidMainObjectIdx; } @@ -784,6 +811,7 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; mainObject.customProjectionIndex = (needsCustomProjection) ? acquireActiveCustomProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomProjectionIndex; mainObject.customClipRectIndex = (needsCustomClipRect) ? acquireActiveCustomClipRectIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomClipRectIndex; + mainObject.transformationType = (uint32_t)activeMainObjectTransformationType; activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); return activeMainObjectIndex; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index c6ae52920..d64e5c0af 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -14,7 +14,7 @@ using namespace nbl::asset; using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 16u); +static_assert(sizeof(MainObject) == 20u); static_assert(sizeof(LineStyle) == 88u); // ! DrawResourcesFiller @@ -149,9 +149,15 @@ struct DrawResourcesFiller // take a `SIntendedSubmitInfo` like others, but don't use it as I don't want you to handle anything regarding autoSubmit // somehow retrieve or calculate the geometry buffer offsets of your vertex and index buffer to be used outside for binding purposes + //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); + + //! Draws a fixed-geometry polyline using a custom transformation. + //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. + void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); @@ -230,7 +236,7 @@ struct DrawResourcesFiller void setActiveLineStyle(const LineStyleInfo& lineStyle); void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); - void beginMainObject(MainObjectType type); + void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::NORMAL); void endMainObject(); void pushCustomProjection(const float64_t3x3& projection); @@ -522,6 +528,7 @@ struct DrawResourcesFiller uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; MainObjectType activeMainObjectType; + TransformationType activeMainObjectTransformationType; uint32_t activeMainObjectIndex = InvalidMainObjectIdx; // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index d3a4968bb..e5fe21e03 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,7 +1,7 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#define NBL_FORCE_EMULATED_FLOAT_64 +// #define NBL_FORCE_EMULATED_FLOAT_64 #include #include @@ -62,6 +62,7 @@ struct Globals { Pointers pointers; pfloat64_t3x3 defaultProjectionToNDC; + pfloat64_t3x3 screenToWorldScaleTransform; // Pre-multiply your transform with this to scale in screen space (e.g., scale 100.0 means 100 screen pixels). float screenToWorldRatio; float worldToScreenRatio; uint32_t2 resolution; @@ -71,7 +72,7 @@ struct Globals float32_t _padding; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 160u); +static_assert(sizeof(Globals) == 232u); #endif #ifdef __HLSL_VERSION @@ -133,6 +134,13 @@ enum class MajorAxis : uint32_t MAJOR_Y = 1u, }; +enum TransformationType +{ + NORMAL = 0, + FIXED_SCREENSPACE_SIZE +}; + + // Consists of multiple DrawObjects // [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound // [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work @@ -142,6 +150,7 @@ struct MainObject uint32_t dtmSettingsIdx; uint32_t customProjectionIndex; uint32_t customClipRectIndex; + uint32_t transformationType; // todo pack later, it's just 2 possible values atm }; struct DrawObject @@ -151,7 +160,6 @@ struct DrawObject uint64_t geometryAddress; }; - // Goes into geometry buffer, needs to be aligned by 8 struct LinePointInfo { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index c979f3b0b..a25426b95 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -32,15 +32,9 @@ struct NDCClipProjectionData NDCClipProjectionData getClipProjectionData(in MainObject mainObj) { - pfloat64_t3x3 weirdProjection = nbl::hlsl::_static_cast( - float32_t3x3(1.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 1.0f)); - NDCClipProjectionData ret; if (mainObj.customProjectionIndex != InvalidCustomProjectionIndex) { - // If projection type is worldspace projection and clip: pfloat64_t3x3 customProjection = loadCustomProjection(mainObj.customProjectionIndex); ret.projectionToNDC = nbl::hlsl::mul(globals.defaultProjectionToNDC, customProjection); @@ -62,7 +56,8 @@ NDCClipProjectionData getClipProjectionData(in MainObject mainObj) ret.maxClipNDC = float2(+1.0f, +1.0f); } - ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, weirdProjection); + if (mainObj.transformationType == TransformationType::FIXED_SCREENSPACE_SIZE) + ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, globals.screenToWorldScaleTransform); return ret; } From 7381460e1c9a5b0bc1a8f319c0f81b0d577e97d8 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 2 May 2025 11:19:59 +0400 Subject: [PATCH 065/129] drawFixedGeometryPolyline add case to example --- 62_CAD/main.cpp | 105 +++++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 89938d2b0..04d99a9cf 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -58,7 +58,7 @@ enum class ExampleMode CASE_7, // Images CASE_8, // MSDF and Text CASE_9, // DTM - CASE_BUG, // Bug Repro + CASE_BUG, // Bug Repro, after fix, rename to CASE_10 and comment should be: testing fixed geometry and emulated fp64 corner cases CASE_COUNT }; @@ -1220,6 +1220,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu float screenToWorld = getScreenToWorldRatio(globalData.defaultProjectionToNDC, globalData.resolution); globalData.screenToWorldRatio = screenToWorld; globalData.worldToScreenRatio = (1.0f/screenToWorld); + globalData.screenToWorldScaleTransform = float64_t3x3(globalData.worldToScreenRatio, 0.0f, 0.0f, + 0.0f, globalData.worldToScreenRatio, 0.0f, + 0.0f, 0.0f, 1.0f); globalData.miterLimit = 10.0f; globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; @@ -3343,45 +3346,73 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu style.worldSpaceLineWidth = 0.0f; style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); - //for (uint32_t i = 0; i < 128u; ++i) - //{ - // std::vector> quadBeziers; - // curves::EllipticalArcInfo myCircle; - // { - // myCircle.majorAxis = { 0.05 , 0.0}; - // myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; - // myCircle.angleBounds = { - // nbl::core::PI() * 0.0, - // nbl::core::PI() * 2.0 - // }; - // myCircle.eccentricity = 1.0; - // } - - // curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void - // { - // quadBeziers.push_back(info); - // }; - - // curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); - // polyline.addQuadBeziers(quadBeziers); - // drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); - // polyline.clearEverything(); - //} - - float64_t2 line0[2u] = + for (uint32_t i = 0; i < 128u; ++i) { - float64_t2(-1.0, 0.0), - float64_t2(+1.0, 0.0), - }; - float64_t2 line1[2u] = + std::vector> quadBeziers; + curves::EllipticalArcInfo myCircle; + { + myCircle.majorAxis = { 0.05 , 0.0}; + myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; + myCircle.angleBounds = { + nbl::core::PI() * 0.0, + nbl::core::PI() * 2.0 + }; + myCircle.eccentricity = 1.0; + } + + curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void + { + quadBeziers.push_back(info); + }; + + curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); + polyline.addQuadBeziers(quadBeziers); + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + polyline.clearEverything(); + } + + // Testing Fixed Geometry { - float64_t2(0.0, -1.0), - float64_t2(0.0, +1.0), - }; + float64_t2 line0[2u] = + { + float64_t2(-1.0, 0.0), + float64_t2(+1.0, 0.0), + }; + float64_t2 line1[2u] = + { + float64_t2(0.0, -1.0), + float64_t2(0.0, +1.0), + }; - polyline.addLinePoints(line0); - polyline.addLinePoints(line1); - drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + float64_t3x3 translateMat = + { + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t angle = m_timeElapsed * 0.001; + float64_t2 dir = float64_t2{ cos(angle), sin(angle) }; + float64_t3x3 rotateMat = + { + dir.x, -dir.y, 0.0, + dir.y, dir.x, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t2 scale = float64_t2{ 10.0, 10.0 }; + float64_t3x3 scaleMat = + { + scale.x, 0.0, 0.0, + 0.0, scale.y, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t3x3 transformation = nbl::hlsl::mul(translateMat, nbl::hlsl::mul(rotateMat, scaleMat)); + polyline.addLinePoints(line0); + polyline.addLinePoints(line1); + drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::FIXED_SCREENSPACE_SIZE, intendedNextSubmit); + } } drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); From 1db6e0461f06c9316ae4181d25e9e42e97c34707 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 2 May 2025 13:04:19 +0400 Subject: [PATCH 066/129] small fixes on transformation type --- 62_CAD/DrawResourcesFiller.cpp | 6 +++--- 62_CAD/DrawResourcesFiller.h | 2 +- 62_CAD/shaders/globals.hlsl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b2f4e4950..55273873e 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -87,7 +87,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line setActiveLineStyle(lineStyleInfo); - beginMainObject(MainObjectType::POLYLINE, TransformationType::NORMAL); + beginMainObject(MainObjectType::POLYLINE, TransformationType::TT_NORMAL); drawPolyline(polyline, intendedNextSubmit); endMainObject(); } @@ -111,7 +111,7 @@ void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polylin pushCustomProjection(transformation); } - beginMainObject(MainObjectType::POLYLINE, TransformationType::FIXED_SCREENSPACE_SIZE); + beginMainObject(MainObjectType::POLYLINE, transformationType); drawPolyline(polyline, intendedNextSubmit); endMainObject(); popCustomProjection(); @@ -399,7 +399,7 @@ void DrawResourcesFiller::beginMainObject(MainObjectType type, TransformationTyp void DrawResourcesFiller::endMainObject() { activeMainObjectType = MainObjectType::NONE; - activeMainObjectTransformationType = TransformationType::NORMAL; + activeMainObjectTransformationType = TransformationType::TT_NORMAL; activeMainObjectIndex = InvalidMainObjectIdx; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index d64e5c0af..049299974 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -236,7 +236,7 @@ struct DrawResourcesFiller void setActiveLineStyle(const LineStyleInfo& lineStyle); void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); - void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::NORMAL); + void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); void endMainObject(); void pushCustomProjection(const float64_t3x3& projection); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index e5fe21e03..a71c920a6 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -136,8 +136,8 @@ enum class MajorAxis : uint32_t enum TransformationType { - NORMAL = 0, - FIXED_SCREENSPACE_SIZE + TT_NORMAL = 0, + TT_FIXED_SCREENSPACE_SIZE }; From 7521f571d80aee25bb88f6ee30e416e709f60d74 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 2 May 2025 13:05:07 +0400 Subject: [PATCH 067/129] fix vtx shader typo --- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index a25426b95..478ad964f 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -56,7 +56,7 @@ NDCClipProjectionData getClipProjectionData(in MainObject mainObj) ret.maxClipNDC = float2(+1.0f, +1.0f); } - if (mainObj.transformationType == TransformationType::FIXED_SCREENSPACE_SIZE) + if (mainObj.transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, globals.screenToWorldScaleTransform); return ret; From 7ab6f5d7fbe22841da07b3fcf03a235e5b04e1e3 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 3 May 2025 10:44:30 +0400 Subject: [PATCH 068/129] small example edit --- 62_CAD/main.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 04d99a9cf..9ab67ffe2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -77,7 +77,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_BUG; +constexpr ExampleMode mode = ExampleMode::CASE_2; class Camera2D { @@ -3342,8 +3342,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu CPolyline polyline; LineStyleInfo style = {}; - style.screenSpaceLineWidth = 1.0f; - style.worldSpaceLineWidth = 0.0f; + style.screenSpaceLineWidth = 4.0f; style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); for (uint32_t i = 0; i < 128u; ++i) @@ -3367,7 +3366,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); polyline.addQuadBeziers(quadBeziers); - drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + // drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); polyline.clearEverything(); } @@ -3378,10 +3377,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu float64_t2(-1.0, 0.0), float64_t2(+1.0, 0.0), }; - float64_t2 line1[2u] = + float64_t2 line1[3u] = { float64_t2(0.0, -1.0), float64_t2(0.0, +1.0), + float64_t2(+1.0, +1.0), }; float64_t3x3 translateMat = @@ -3390,7 +3390,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0.0, 1.0, 0.0, 0.0, 0.0, 1.0 }; - + float64_t angle = m_timeElapsed * 0.001; float64_t2 dir = float64_t2{ cos(angle), sin(angle) }; float64_t3x3 rotateMat = @@ -3400,7 +3400,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu 0.0, 0.0, 1.0 }; - float64_t2 scale = float64_t2{ 10.0, 10.0 }; + float64_t2 scale = float64_t2{ 100.0, 100.0 }; float64_t3x3 scaleMat = { scale.x, 0.0, 0.0, @@ -3411,7 +3411,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu float64_t3x3 transformation = nbl::hlsl::mul(translateMat, nbl::hlsl::mul(rotateMat, scaleMat)); polyline.addLinePoints(line0); polyline.addLinePoints(line1); - drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::FIXED_SCREENSPACE_SIZE, intendedNextSubmit); + polyline.preprocessPolylineWithStyle(style); + // drawResourcesFiller.drawPolyline(polyline, intendedNextSubmit); + drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::TT_FIXED_SCREENSPACE_SIZE, intendedNextSubmit); } } From f88f6b6a0710c052bac6b1d5bedbd2af9842af53 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sun, 4 May 2025 17:40:00 +0400 Subject: [PATCH 069/129] Preparation work for backing up and restoring draw resources cache (for view-only GPU mode in n4ce) --- 62_CAD/DrawResourcesFiller.cpp | 88 +++++++++++++++++----------------- 62_CAD/DrawResourcesFiller.h | 74 ++++++++++++++++++---------- 62_CAD/shaders/globals.hlsl | 1 + 3 files changed, 92 insertions(+), 71 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 55273873e..7bfb92cea 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -17,6 +17,7 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { + // TODO: Make this function failable and report insufficient memory if less that getMinimumRequiredResourcesBufferSize, TODO: Have retry mechanism to allocate less mem size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); // size = 368u; STRESS TEST @@ -33,9 +34,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { - msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); - msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); - + // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; assert(maxMSDFs <= logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers); @@ -78,6 +77,10 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui msdfTextureArray = logicalDevice->createImageView(std::move(imgViewInfo)); } + + msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); + msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); + msdfStagedCPUImages.resize(maxMSDFs); } void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit) @@ -373,7 +376,7 @@ bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNe bool success = true; flushDrawObjects(); success &= finalizeBufferCopies(intendedNextSubmit); - success &= finalizeTextureCopies(intendedNextSubmit); + success &= finalizeMSDFImagesCopies(intendedNextSubmit); return success; } @@ -488,13 +491,8 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext return true; } -bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::finalizeMSDFImagesCopies(SIntendedSubmitInfo& intendedNextSubmit) { - msdfTextureArrayIndicesUsed.clear(); // clear msdf textures used in the frame, because the frame finished and called this function. - - if (!msdfTextureCopies.size() && m_hasInitializedMSDFTextureArrays) // even if the textureCopies are empty, we want to continue if not initialized yet so that the layout of all layers become READ_ONLY_OPTIMAL - return true; // yay successfully copied nothing - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); if (cmdBuffInfo) @@ -533,21 +531,20 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex // Do the copies and advance the iterator. // this is the pattern we use for iterating when entries will get erased if processed successfully, but may get skipped for later. - auto oit = msdfTextureCopies.begin(); - for (auto iit = msdfTextureCopies.begin(); iit != msdfTextureCopies.end(); iit++) + for (uint32_t i = 0u; i < msdfStagedCPUImages.size(); ++i) { - bool copySuccess = true; - if (iit->image && iit->index < msdfImage->getCreationParameters().arrayLayers) + auto& stagedMSDF = msdfStagedCPUImages[i]; + if (stagedMSDF.image && i < msdfImage->getCreationParameters().arrayLayers) { - for (uint32_t mip = 0; mip < iit->image->getCreationParameters().mipLevels; mip++) + for (uint32_t mip = 0; mip < stagedMSDF.image->getCreationParameters().mipLevels; mip++) { - auto mipImageRegion = iit->image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); + auto mipImageRegion = stagedMSDF.image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); if (mipImageRegion) { asset::IImage::SBufferCopy region = {}; region.imageSubresource.aspectMask = asset::IImage::EAF_COLOR_BIT; region.imageSubresource.mipLevel = mipImageRegion->imageSubresource.mipLevel; - region.imageSubresource.baseArrayLayer = iit->index; + region.imageSubresource.baseArrayLayer = i; region.imageSubresource.layerCount = 1u; region.bufferOffset = 0u; region.bufferRowLength = mipImageRegion->getExtent().width; @@ -555,46 +552,30 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex region.imageExtent = mipImageRegion->imageExtent; region.imageOffset = { 0u, 0u, 0u }; - auto buffer = reinterpret_cast(iit->image->getBuffer()->getPointer()); + auto buffer = reinterpret_cast(stagedMSDF.image->getBuffer()->getPointer()); auto bufferOffset = mipImageRegion->bufferOffset; - if (!m_utilities->updateImageViaStagingBuffer( + stagedMSDF.uploadedToGPU = m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, buffer + bufferOffset, nbl::ext::TextRendering::TextRenderer::MSDFTextureFormat, msdfImage.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { ®ion, ®ion + 1 })) - { - // TODO: Log which mip failed - copySuccess = false; - } + { ®ion, ®ion + 1 }); } else { - // TODO: Log - copySuccess = false; + assert(false); + stagedMSDF.uploadedToGPU = false; } } } else { assert(false); - copySuccess = false; - } - - if (!copySuccess) - { - // we move the failed copy to the oit and advance it - if (oit != iit) - *oit = *iit; - oit++; + stagedMSDF.uploadedToGPU = false; } } - // trim - const auto newSize = std::distance(msdfTextureCopies.begin(), oit); - _NBL_DEBUG_BREAK_IF(newSize != 0u); // we had failed copies - msdfTextureCopies.resize(newSize); // preparing msdfs for use image_barrier_t afterTransferImageBarrier[] = @@ -1169,12 +1150,12 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor // TextureReferences hold the semaValue related to the "scratch semaphore" in IntendedSubmitInfo // Every single submit increases this value by 1 - // The reason for hiolding on to the lastUsedSema is deferred dealloc, which we call in the case of eviction, making sure we get rid of the entry inside the allocator only when the texture is done being used + // The reason for holding on to the lastUsedSema is deferred dealloc, which we call in the case of eviction, making sure we get rid of the entry inside the allocator only when the texture is done being used const auto nextSemaSignal = intendedNextSubmit.getFutureScratchSemaphore(); auto evictionCallback = [&](const MSDFReference& evicted) { - if (msdfTextureArrayIndicesUsed.contains(evicted.alloc_idx)) + if (msdfStagedCPUImages[evicted.alloc_idx].usedThisFrame) { // Dealloc once submission is finished msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); @@ -1187,6 +1168,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor // We didn't use it this frame, so it's safe to dealloc now, withou needing to "overflow" submit msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx); } + msdfStagedCPUImages[evicted.alloc_idx].evict(); }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema @@ -1201,8 +1183,9 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We queue copy and finalize all on `finalizeTextureCopies` function called before draw calls to make sure it's in mem - msdfTextureCopies.push_back({ .image = std::move(cpuImage), .index = inserted->alloc_idx }); + // We stage copy, finalizeMSDFImagesCopies will push it into GPU + msdfStagedCPUImages[inserted->alloc_idx].image = std::move(cpuImage); + msdfStagedCPUImages[inserted->alloc_idx].uploadedToGPU = false; } else { @@ -1213,7 +1196,22 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor assert(inserted->alloc_idx != InvalidTextureIdx); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed if (inserted->alloc_idx != InvalidTextureIdx) - msdfTextureArrayIndicesUsed.emplace(inserted->alloc_idx); + { + msdfStagedCPUImages[inserted->alloc_idx].usedThisFrame = true; + } return inserted->alloc_idx; -} \ No newline at end of file +} + +void DrawResourcesFiller::flushDrawObjects() +{ + if (resourcesCollection.drawObjects.getCount() > drawObjectsFlushedToDrawCalls) + { + DrawCallData drawCall = {}; + drawCall.isDTMRendering = false; + drawCall.drawObj.drawObjectStart = drawObjectsFlushedToDrawCalls; + drawCall.drawObj.drawObjectCount = resourcesCollection.drawObjects.getCount() - drawObjectsFlushedToDrawCalls; + drawCalls.push_back(drawCall); + drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); + } +} diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 049299974..fb4f2c97a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -218,6 +218,7 @@ struct DrawResourcesFiller resetCustomClipRects(); resetLineStyles(); resetDTMSettings(); + resetMSDFsUsageState(); drawObjectsFlushedToDrawCalls = 0ull; drawCalls.clear(); @@ -261,7 +262,7 @@ struct DrawResourcesFiller /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj uint32_t getActiveMainObjectIndex() const { return activeMainObjectIndex; } - // TODO: Remove these later, these are for multiple draw calls instead of a single one. + // NOTE: Most probably Going to get removed soon with a single draw call in GPU-driven rendering struct DrawCallData { union @@ -282,37 +283,29 @@ struct DrawResourcesFiller bool isDTMRendering; }; - uint64_t drawObjectsFlushedToDrawCalls = 0ull; + const std::vector& getDrawCalls() const { return drawCalls; } - void flushDrawObjects() + // ! This is all the textures and buffers that were staged on CPU and eventually copied to GPU in a single submit + // ! This data is prepped and ready to be consumed by GPU with no further transformations applied on the data. + // ! You can back this up, and replay your scene without having to traverse your scene and do AddXXX, DrawXXX all over again. + struct DrawResourcesCache { - if (resourcesCollection.drawObjects.getCount() > drawObjectsFlushedToDrawCalls) - { - DrawCallData drawCall = {}; - drawCall.isDTMRendering = false; - drawCall.drawObj.drawObjectStart = drawObjectsFlushedToDrawCalls; - drawCall.drawObj.drawObjectCount = resourcesCollection.drawObjects.getCount() - drawObjectsFlushedToDrawCalls; - drawCalls.push_back(drawCall); - drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); - } - } - - std::vector drawCalls; // either dtms or objects + // TODO: Resources Colletion + // TODO: MSDFs Staging Cache + // TODO: Draw Calls Data + // TODO: Get total memory consumption + }; + // TODO: Backup which gives DrawResourcesCache + // TODO: Restore which gets DrawResourcesCache protected: - - struct MSDFTextureCopy - { - core::smart_refctd_ptr image; - uint32_t index; - }; SubmitFunc submitDraws; bool finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit); - bool finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit); + bool finalizeMSDFImagesCopies(SIntendedSubmitInfo& intendedNextSubmit); const size_t calculateRemainingResourcesSize() const; @@ -420,6 +413,12 @@ struct DrawResourcesFiller resourcesCollection.dtmSettings.vector.clear(); activeDTMSettingsIndex = InvalidDTMSettingsIdx; } + + void resetMSDFsUsageState() + { + for (auto& stagedMSDF : msdfStagedCPUImages) + stagedMSDF.usedThisFrame = false; + } // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t @@ -511,6 +510,13 @@ struct DrawResourcesFiller // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + // Flushes Current Draw Call and adds to drawCalls + void flushDrawObjects(); + + // DrawCalls Data + uint64_t drawObjectsFlushedToDrawCalls = 0ull; + std::vector drawCalls; // either dtms or objects + // ResourcesCollection and packed into GPUBuffer ResourcesCollection resourcesCollection; nbl::core::smart_refctd_ptr resourcesGPUBuffer; @@ -529,8 +535,8 @@ struct DrawResourcesFiller MainObjectType activeMainObjectType; TransformationType activeMainObjectTransformationType; - uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops @@ -538,16 +544,32 @@ struct DrawResourcesFiller std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops - // MSDF + struct MSDFStagedCPUImage + { + core::smart_refctd_ptr image; + bool uploadedToGPU : 1u; + // TODO: Use frame counter instead, generalize struct to all textures probably, DONT try to abuse scratchSema.nextSignal as frame tracker, because there can be "cached" draws where no submits happen. + bool usedThisFrame : 1u; + + bool isValid() const { return image.get() != nullptr; } + void evict() + { + image = nullptr; + uploadedToGPU = false; + usedThisFrame = false; + } + }; + GetGlyphMSDFTextureFunc getGlyphMSDF; GetHatchFillPatternMSDFTextureFunc getHatchFillPatternMSDF; using MSDFsLRUCache = core::LRUCache; smart_refctd_ptr msdfTextureArray; // view to the resource holding all the msdfs in it's layers smart_refctd_ptr msdfTextureArrayIndexAllocator; - std::set msdfTextureArrayIndicesUsed = {}; // indices in the msdf texture array allocator that have been used in the current frame // TODO: make this a dynamic bitset - std::vector msdfTextureCopies = {}; // queued up texture copies std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow + + // TODO: Maybe move this to Resources Collection? + std::vector msdfStagedCPUImages = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index a71c920a6..69346ee14 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -501,6 +501,7 @@ NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidMSDFImageIdx = nbl::hlsl::numeric_limits::max; // Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; From 4ec49382bee48a8cf5b1d5f5916a40c350c354e3 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 5 May 2025 15:07:50 +0400 Subject: [PATCH 070/129] CacheAndReplay Feature --- 62_CAD/DrawResourcesFiller.cpp | 109 ++++++++++++++++++++++------- 62_CAD/DrawResourcesFiller.h | 108 ++++++++++++++++++----------- 62_CAD/main.cpp | 121 +++++++++++++++++++++------------ 3 files changed, 230 insertions(+), 108 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7bfb92cea..c83055f0e 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -371,15 +371,39 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 endMainObject(); } -bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) { + if (!intendedNextSubmit.valid()) + { + // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources + // In that case we don't push any uploads (i.e. we don't record any copy commmand in active command buffer, because there is no active command buffer) + return false; + } + bool success = true; - flushDrawObjects(); - success &= finalizeBufferCopies(intendedNextSubmit); - success &= finalizeMSDFImagesCopies(intendedNextSubmit); + if (currentReplayCache) + { + // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation + success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfStagedCPUImages); + } + else + { + flushDrawObjects(); + success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, msdfStagedCPUImages); + } return success; } +const DrawResourcesFiller::ResourcesCollection& DrawResourcesFiller::getResourcesCollection() const +{ + if (currentReplayCache) + return currentReplayCache->resourcesCollection; + else + return resourcesCollection; +} + void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) { activeLineStyle = lineStyle; @@ -435,7 +459,50 @@ void DrawResourcesFiller::popCustomClipRect() activeClipRectIndices.pop_back(); } -bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit) +/// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj +uint32_t DrawResourcesFiller::getActiveMainObjectIndex() const +{ + if (currentReplayCache) + return currentReplayCache->activeMainObjectIndex; + else + return activeMainObjectIndex; +} + +const std::vector& DrawResourcesFiller::getDrawCalls() const +{ + if (currentReplayCache) + return currentReplayCache->drawCallsData; + else + return drawCalls; +} + +std::unique_ptr DrawResourcesFiller::createReplayCache() +{ + flushDrawObjects(); + std::unique_ptr ret = std::unique_ptr(new ReplayCache); + ret->resourcesCollection = resourcesCollection; + ret->msdfStagedCPUImages = msdfStagedCPUImages; + for (auto& stagedMSDF : ret->msdfStagedCPUImages) + { + stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. + stagedMSDF.usedThisFrame = false; + } + ret->drawCallsData = drawCalls; + ret->activeMainObjectIndex = activeMainObjectIndex; + return ret; +} + +void DrawResourcesFiller::setReplayCache(ReplayCache* cache) +{ + currentReplayCache = cache; +} + +void DrawResourcesFiller::unsetReplayCache() +{ + currentReplayCache = nullptr; +} + +bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) { copiedResourcesSize = 0ull; @@ -479,19 +546,19 @@ bool DrawResourcesFiller::finalizeBufferCopies(SIntendedSubmitInfo& intendedNext copiedResourcesSize += drawBuffer.getAlignedStorageSize(); }; - copyCPUFilledDrawBuffer(resourcesCollection.lineStyles); - copyCPUFilledDrawBuffer(resourcesCollection.dtmSettings); - copyCPUFilledDrawBuffer(resourcesCollection.customProjections); - copyCPUFilledDrawBuffer(resourcesCollection.customClipRects); - copyCPUFilledDrawBuffer(resourcesCollection.mainObjects); - copyCPUFilledDrawBuffer(resourcesCollection.drawObjects); - copyCPUFilledDrawBuffer(resourcesCollection.indexBuffer); - copyCPUFilledDrawBuffer(resourcesCollection.geometryInfo); + copyCPUFilledDrawBuffer(resources.lineStyles); + copyCPUFilledDrawBuffer(resources.dtmSettings); + copyCPUFilledDrawBuffer(resources.customProjections); + copyCPUFilledDrawBuffer(resources.customClipRects); + copyCPUFilledDrawBuffer(resources.mainObjects); + copyCPUFilledDrawBuffer(resources.drawObjects); + copyCPUFilledDrawBuffer(resources.indexBuffer); + copyCPUFilledDrawBuffer(resources.geometryInfo); return true; } -bool DrawResourcesFiller::finalizeMSDFImagesCopies(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) { auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); @@ -531,9 +598,9 @@ bool DrawResourcesFiller::finalizeMSDFImagesCopies(SIntendedSubmitInfo& intended // Do the copies and advance the iterator. // this is the pattern we use for iterating when entries will get erased if processed successfully, but may get skipped for later. - for (uint32_t i = 0u; i < msdfStagedCPUImages.size(); ++i) + for (uint32_t i = 0u; i < stagedMSDFCPUImages.size(); ++i) { - auto& stagedMSDF = msdfStagedCPUImages[i]; + auto& stagedMSDF = stagedMSDFCPUImages[i]; if (stagedMSDF.image && i < msdfImage->getCreationParameters().arrayLayers) { for (uint32_t mip = 0; mip < stagedMSDF.image->getCreationParameters().mipLevels; mip++) @@ -572,7 +639,6 @@ bool DrawResourcesFiller::finalizeMSDFImagesCopies(SIntendedSubmitInfo& intended } else { - assert(false); stagedMSDF.uploadedToGPU = false; } } @@ -624,7 +690,6 @@ const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex) { - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); // it will be 0 because it's first mainObjectIndex after reset and invalidation @@ -780,7 +845,6 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten if (needToOverflowSubmit) { // failed to fit into remaining resources mem or exceeded max indexable mainobj - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } @@ -803,7 +867,6 @@ uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& l if (outLineStyleIdx == InvalidStyleIdx) { // There wasn't enough resource memory remaining to fit a single LineStyle - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! @@ -821,7 +884,6 @@ uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInf if (outDTMSettingIdx == InvalidDTMSettingsIdx) { // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! @@ -839,7 +901,6 @@ uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t if (!enoughMem) { - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } @@ -856,7 +917,6 @@ uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRe if (!enoughMem) { - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } @@ -1159,7 +1219,6 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor { // Dealloc once submission is finished msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded } @@ -1183,7 +1242,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We stage copy, finalizeMSDFImagesCopies will push it into GPU + // We stage copy, pushMSDFImagesUploads will push it into GPU msdfStagedCPUImages[inserted->alloc_idx].image = std::move(cpuImage); msdfStagedCPUImages[inserted->alloc_idx].uploadedToGPU = false; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index fb4f2c97a..a10379e1a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -86,6 +86,7 @@ struct DrawResourcesFiller }; /// @brief struct to hold all resources + // TODO: rename to staged resources buffers or something like that struct ResourcesCollection { // auto-submission level 0 resources (settings that mainObj references) @@ -204,12 +205,12 @@ struct DrawResourcesFiller float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit); - - /// @brief call this function before submitting to ensure all resources are copied + + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. - bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); - /// @brief resets resources buffers + /// @brief resets staging buffers and images void reset() { resetDrawObjects(); @@ -225,7 +226,7 @@ struct DrawResourcesFiller } /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders - const ResourcesCollection& getResourcesCollection() const { return resourcesCollection; } + const ResourcesCollection& getResourcesCollection() const; /// @brief buffer containing all non-texture type resources nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } @@ -260,7 +261,23 @@ struct DrawResourcesFiller } /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj - uint32_t getActiveMainObjectIndex() const { return activeMainObjectIndex; } + uint32_t getActiveMainObjectIndex() const; + + struct MSDFStagedCPUImage + { + core::smart_refctd_ptr image; + bool uploadedToGPU : 1u; + // TODO: Use frame counter instead, generalize struct to all textures probably, DONT try to abuse scratchSema.nextSignal as frame tracker, because there can be "cached" draws where no submits happen. + bool usedThisFrame : 1u; + + bool isValid() const { return image.get() != nullptr; } + void evict() + { + image = nullptr; + uploadedToGPU = false; + usedThisFrame = false; + } + }; // NOTE: Most probably Going to get removed soon with a single draw call in GPU-driven rendering struct DrawCallData @@ -283,29 +300,58 @@ struct DrawResourcesFiller bool isDTMRendering; }; - const std::vector& getDrawCalls() const { return drawCalls; } - - // ! This is all the textures and buffers that were staged on CPU and eventually copied to GPU in a single submit - // ! This data is prepped and ready to be consumed by GPU with no further transformations applied on the data. - // ! You can back this up, and replay your scene without having to traverse your scene and do AddXXX, DrawXXX all over again. - struct DrawResourcesCache + const std::vector& getDrawCalls() const; + + /// @brief Stores all CPU-side resources that were staged and prepared for a single GPU submission. + /// + /// *** This cache includes anything used or referenced from DrawResourcesFiller in the Draw Submit: + /// - Buffer data (geometry, indices, etc.) + /// - MSDF CPU images + /// - Draw call metadata + /// - Active MainObject Index --> this is another state of the submit that we need to store + /// + /// The data is fully preprocessed and ready to be pushed to the GPU with no further transformation. + /// This enables efficient replays without traversing or re-generating scene content. + struct ReplayCache { - // TODO: Resources Colletion - // TODO: MSDFs Staging Cache - // TODO: Draw Calls Data - // TODO: Get total memory consumption + ResourcesCollection resourcesCollection; + std::vector msdfStagedCPUImages; + std::vector drawCallsData; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + // TODO: non msdf general CPU Images + // TODO: Get total memory consumption for logging? }; - // TODO: Backup which gives DrawResourcesCache - // TODO: Restore which gets DrawResourcesCache + /// @brief Creates a snapshot of all currently staged CPU-side resourcesCollection for future replay or deferred submission. + /// + /// @warning This cache corresponds to a **single intended GPU submit**. + /// If your frame submission overflows into multiple submits due to staging memory limits or batching, + /// you are responsible for creating **multiple ReplayCache instances**, one per submit. + /// + /// @return A heap-allocated ReplayCache containing a copy of all staged CPU-side resourcesCollection and draw call data. + std::unique_ptr createReplayCache(); + + /// @brief Redirects all subsequent resource upload and getters to use an external ReplayCache. + /// + /// After calling this function, staging, resource getters, and upload mechanisms will pull data from the given ReplayCache + /// instead of the internal accumulation cache. + /// + /// User is responsible for management of cache and making sure it's alive in the ReplayCache scope + void setReplayCache(ReplayCache* cache); + + /// @brief Reverts internal logic to use the default internal staging and resource accumulation cache. + /// Must be called once per corresponding `pushReplayCacheUse()`. + void unsetReplayCache(); protected: SubmitFunc submitDraws; - - bool finalizeBufferCopies(SIntendedSubmitInfo& intendedNextSubmit); - bool finalizeMSDFImagesCopies(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. + bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages); const size_t calculateRemainingResourcesSize() const; @@ -513,6 +559,9 @@ struct DrawResourcesFiller // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); + // Replay Cache override + ReplayCache* currentReplayCache = nullptr; + // DrawCalls Data uint64_t drawObjectsFlushedToDrawCalls = 0ull; std::vector drawCalls; // either dtms or objects @@ -544,22 +593,6 @@ struct DrawResourcesFiller std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops - struct MSDFStagedCPUImage - { - core::smart_refctd_ptr image; - bool uploadedToGPU : 1u; - // TODO: Use frame counter instead, generalize struct to all textures probably, DONT try to abuse scratchSema.nextSignal as frame tracker, because there can be "cached" draws where no submits happen. - bool usedThisFrame : 1u; - - bool isValid() const { return image.get() != nullptr; } - void evict() - { - image = nullptr; - uploadedToGPU = false; - usedThisFrame = false; - } - }; - GetGlyphMSDFTextureFunc getGlyphMSDF; GetHatchFillPatternMSDFTextureFunc getHatchFillPatternMSDF; @@ -568,7 +601,6 @@ struct DrawResourcesFiller smart_refctd_ptr msdfTextureArrayIndexAllocator; std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow - // TODO: Maybe move this to Resources Collection? std::vector msdfStagedCPUImages = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 9ab67ffe2..c7fe04603 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -45,6 +45,7 @@ static constexpr bool DebugModeWireframe = false; static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; +static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic enum class ExampleMode { @@ -77,7 +78,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_2; +constexpr ExampleMode mode = ExampleMode::CASE_9; class Camera2D { @@ -240,7 +241,7 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::fill(m_framebuffers.begin(),m_framebuffers.end(),nullptr); } - // For creating extra per-image or swapchain resources you might need + // For creating extra per-image or swapchain resourcesCollection you might need virtual inline bool onCreateSwapchain_impl(const uint8_t qFam) { auto device = const_cast(m_renderpass->getOriginDevice()); @@ -286,10 +287,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu constexpr static uint32_t MaxSubmitsInFlight = 16u; public: - void allocateResources(uint32_t maxObjects) + void allocateResources() { drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); - + size_t bufferSize = 512u * 1024u * 1024u; // 512 MB drawResourcesFiller.allocateResourcesBuffer(m_device.get(), bufferSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); @@ -626,7 +627,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu double dt = 0; double m_timeElapsed = 0.0; std::chrono::steady_clock::time_point lastTime; - uint32_t m_hatchDebugStep = 0u; + uint32_t m_hatchDebugStep = 10u; E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; inline bool onAppInitialized(smart_refctd_ptr&& system) override @@ -657,7 +658,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{})) return logFail("Could not initialize the Surface!"); - allocateResources(1024 * 1024u); + allocateResources(); const bitflag bindlessTextureFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT | @@ -1089,6 +1090,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } , m_logger.get()); + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u; + if (isCachingDraw) + { + SIntendedSubmitInfo invalidSubmit = {}; + addObjects(invalidSubmit); // if any overflows happen here, it will add to our replay cache and not submit anything + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + finishedCachingDraw = true; + } if (!beginFrameRender()) return; @@ -1109,10 +1118,28 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered }; m_intendedNextSubmit.waitSemaphores = waitSems; - addObjects(m_intendedNextSubmit); - + if (CacheAndReplay) + { + // to size-1u because we only want to submit overflows here. + for (uint32_t i = 0u; i < replayCaches.size() - 1u; ++i) + { + drawResourcesFiller.setReplayCache(replayCaches[i].get()); + submitDraws(m_intendedNextSubmit, true); + drawResourcesFiller.unsetReplayCache(); + } + if (!replayCaches.empty()) + drawResourcesFiller.setReplayCache(replayCaches.back().get()); + } + else + { + addObjects(m_intendedNextSubmit); + } + endFrameRender(m_intendedNextSubmit); + if (CacheAndReplay) + drawResourcesFiller.unsetReplayCache(); + #ifdef BENCHMARK_TILL_FIRST_FRAME if (!stopBenchamrkFlag) { @@ -1187,17 +1214,19 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) { - // TODO: Remove this check later - if (inBetweenSubmit) + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u && !finishedCachingDraw; + if (isCachingDraw) { - m_logger->log("Temporarily Disabled. Auto-Submission shouldn't happen (for Demo)", ILogger::ELL_ERROR); - assert(!inBetweenSubmit); + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + return; // we don't record, submit or do anything, just caching the draw resources } + drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; - const auto& resources = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); float64_t3x3 projectionToNDC; @@ -1206,13 +1235,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu Globals globalData = {}; uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); globalData.pointers = { - .lineStyles = baseAddress + resources.lineStyles.bufferOffset, - .dtmSettings = baseAddress + resources.dtmSettings.bufferOffset, - .customProjections = baseAddress + resources.customProjections.bufferOffset, - .customClipRects = baseAddress + resources.customClipRects.bufferOffset, - .mainObjects = baseAddress + resources.mainObjects.bufferOffset, - .drawObjects = baseAddress + resources.drawObjects.bufferOffset, - .geometryBuffer = baseAddress + resources.geometryInfo.bufferOffset, + .lineStyles = baseAddress + resourcesCollection.lineStyles.bufferOffset, + .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, + .customProjections = baseAddress + resourcesCollection.customProjections.bufferOffset, + .customClipRects = baseAddress + resourcesCollection.customClipRects.bufferOffset, + .mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset, + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, }; globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; @@ -1253,7 +1282,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu uint32_t bufferBarriersCount = 0u; IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; - const auto& resources = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); if (m_globalsBuffer->getSize() > 0u) { @@ -1311,14 +1340,14 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu cb->bindGraphicsPipeline(graphicsPipeline.get()); - for (auto& drawCall : drawResourcesFiller.drawCalls) + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) { if (drawCall.isDTMRendering) { - cb->bindIndexBuffer({ .offset = resources.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); PushConstants pc = { - .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resources.geometryInfo.bufferOffset, + .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, .triangleMeshMainObjectIndex = drawCall.dtm.triangleMeshMainObjectIndex, .isDTMRendering = true }; @@ -1336,8 +1365,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu const uint64_t indexOffset = drawCall.drawObj.drawObjectStart * 6u; const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; - // assert(currentIndexCount == resources.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resources.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } } @@ -1350,7 +1379,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if constexpr (DebugModeWireframe) { - const uint32_t indexCount = resources.drawObjects.getCount() * 6u; + const uint32_t indexCount = resourcesCollection.drawObjects.getCount() * 6u; cb->bindGraphicsPipeline(debugGraphicsPipeline.get()); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } @@ -1448,22 +1477,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void addObjects(SIntendedSubmitInfo& intendedNextSubmit) { - // we record upload of our objects and if we failed to allocate we submit everything - if (!intendedNextSubmit.valid()) - { - // log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } - - // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state - auto* cmdbuf = m_currentRecordingCommandBufferInfo->cmdbuf; - - assert(cmdbuf->getState() == video::IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable()); - assert(cmdbuf->getRecordingFlags().hasFlags(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)); - - auto* cmdpool = cmdbuf->getPool(); - drawResourcesFiller.setSubmitDrawsFunction( [&](SIntendedSubmitInfo& intendedNextSubmit) { @@ -2822,6 +2835,23 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { if (m_realFrameIx == 0u) { + // we record upload of our objects and if we failed to allocate we submit everything + if (!intendedNextSubmit.valid()) + { + // log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state + auto* cmdbuf = m_currentRecordingCommandBufferInfo->cmdbuf; + + assert(cmdbuf->getState() == video::IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable()); + assert(cmdbuf->getRecordingFlags().hasFlags(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)); + + auto* cmdpool = cmdbuf->getPool(); + + // Load image system::path m_loadCWD = ".."; std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; @@ -3416,8 +3446,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::TT_FIXED_SCREENSPACE_SIZE, intendedNextSubmit); } } - - drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) @@ -3433,6 +3461,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; + std::vector> replayCaches = {}; // vector because there can be overflow submits + bool finishedCachingDraw = false; + bool fragmentShaderInterlockEnabled = false; core::smart_refctd_ptr m_inputSystem; From 8677f1fc20c10a41603f75a445a9901132e54321 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 5 May 2025 16:48:49 +0400 Subject: [PATCH 071/129] sync with builtin-shaders work --- 62_CAD/shaders/globals.hlsl | 18 +++++++++++------- .../shaders/main_pipeline/fragment_shader.hlsl | 5 ++--- .../shaders/main_pipeline/resolve_alphas.hlsl | 3 +-- .../shaders/main_pipeline/vertex_shader.hlsl | 3 +-- 62_CAD/shaders/runtimeDeviceConfigCaps.hlsl | 6 ++++++ 5 files changed, 21 insertions(+), 14 deletions(-) create mode 100644 62_CAD/shaders/runtimeDeviceConfigCaps.hlsl diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 69346ee14..b565ff4ff 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,7 +1,14 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -// #define NBL_FORCE_EMULATED_FLOAT_64 +#ifdef __HLSL_VERSION +#ifndef NBL_USE_SPIRV_BUILTINS +#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps +#endif +#endif + +// TODO[Erfan]: Turn off in the future, but keep enabled to test +#define NBL_FORCE_EMULATED_FLOAT_64 #include #include @@ -13,16 +20,14 @@ #ifdef __HLSL_VERSION #include -#include #endif using namespace nbl::hlsl; -// because we can't use jit/device_capabilities.hlsl in c++ code #ifdef __HLSL_VERSION -using pfloat64_t = portable_float64_t; -using pfloat64_t2 = portable_float64_t2; -using pfloat64_t3 = portable_float64_t3; +using pfloat64_t = portable_float64_t; +using pfloat64_t2 = portable_float64_t2; +using pfloat64_t3 = portable_float64_t3; #else using pfloat64_t = float64_t; using pfloat64_t2 = nbl::hlsl::vector; @@ -501,7 +506,6 @@ NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidMSDFImageIdx = nbl::hlsl::numeric_limits::max; // Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 326c4cf0d..6475faeff 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -7,7 +7,6 @@ #include #include #include -#include #include //#include @@ -159,7 +158,7 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = dtmColor.a; gammaUncorrect(textureColor); // want to output to SRGB without gamma correction - return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); } else { @@ -416,6 +415,6 @@ float4 fragMain(PSInput input) : SV_TARGET // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. - return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); + return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); } } diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index 86257428f..69bab6bde 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -1,6 +1,5 @@ #include "common.hlsl" #include -#include template float32_t4 calculateFinalColor(const uint2 fragCoord); @@ -78,5 +77,5 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) [shader("pixel")] float4 resolveAlphaMain(float4 position : SV_Position) : SV_TARGET { - return calculateFinalColor(position.xy); + return calculateFinalColor(position.xy); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 478ad964f..73225e3c0 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -5,7 +5,6 @@ #include #include #include -#include // TODO[Lucas]: Move these functions to builtin hlsl functions (Even the shadertoy obb and aabb ones) float cross2D(float2 a, float2 b) @@ -518,7 +517,7 @@ PSInput main(uint vertexID : SV_VertexID) const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? float2 dilateVec; float2 dilatedUV; - dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); + dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + diff --git a/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl new file mode 100644 index 000000000..96647c0e7 --- /dev/null +++ b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl @@ -0,0 +1,6 @@ +#ifndef _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ +#define _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ + +#include +using DeviceConfigCaps = nbl::hlsl::jit::device_capabilities; +#endif // _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ From 8b5659980c4b00ca6c05356a43ec004743f0b0ab Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 12 May 2025 08:37:40 +0400 Subject: [PATCH 072/129] Small Fix to avoid msdf evictions of glyphs that were re-used --- 62_CAD/DrawResourcesFiller.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index a10379e1a..9be4a5b57 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -548,6 +548,13 @@ struct DrawResourcesFiller { textureIdx = tRef->alloc_idx; tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit + if (textureIdx >= 0u && textureIdx <= msdfStagedCPUImages.size()) + msdfStagedCPUImages[textureIdx].usedThisFrame = true; + else + { + // shouldn't happen, TODO: Log + assert(false); + } } return textureIdx; } From 48b3125a32c722ef46a8fae79aa985670b07f12a Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 12 May 2025 08:38:14 +0400 Subject: [PATCH 073/129] small typo fix, oops --- 62_CAD/DrawResourcesFiller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 9be4a5b57..aec6e3b8e 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -548,7 +548,7 @@ struct DrawResourcesFiller { textureIdx = tRef->alloc_idx; tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit - if (textureIdx >= 0u && textureIdx <= msdfStagedCPUImages.size()) + if (textureIdx >= 0u && textureIdx < msdfStagedCPUImages.size()) msdfStagedCPUImages[textureIdx].usedThisFrame = true; else { From daf68102835a819250e907079106d366fd4cd47f Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 13 May 2025 15:52:15 +0400 Subject: [PATCH 074/129] Support for StaticImages [VRAM Limitation Eviction Untested] + MSDF Image Eviction and Caching Fixes --- 62_CAD/DrawResourcesFiller.cpp | 483 +++++++++++++++--- 62_CAD/DrawResourcesFiller.h | 107 ++-- 62_CAD/main.cpp | 223 +++----- 62_CAD/shaders/globals.hlsl | 3 +- 62_CAD/shaders/main_pipeline/common.hlsl | 2 +- .../main_pipeline/fragment_shader.hlsl | 6 +- 6 files changed, 546 insertions(+), 278 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c83055f0e..d5babd393 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -6,7 +6,9 @@ DrawResourcesFiller::DrawResourcesFiller() DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue) : m_utilities(utils), m_copyQueue(copyQueue) -{} +{ + imagesUsageCache = std::unique_ptr(new ImagesUsageCache(ImagesBindingArraySize)); +} // function is called when buffer is filled and we should submit draws and clear the buffers and continue filling @@ -15,6 +17,13 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) submitDraws = func; } +// DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding +void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding) +{ + imagesArrayBinding = binding; + suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); +} + void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { // TODO: Make this function failable and report insufficient memory if less that getMinimumRequiredResourcesBufferSize, TODO: Have retry mechanism to allocate less mem @@ -239,14 +248,14 @@ void DrawResourcesFiller::drawHatch( if (color.a == 0.0f) // not visible return; - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; if (fillPattern != HatchFillPattern::SOLID_FILL) { MSDFInputInfo msdfInfo = MSDFInputInfo(fillPattern); textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) + if (textureIdx == InvalidTextureIndex) textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); - _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIdx); // probably getHatchFillPatternMSDF returned nullptr + _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIndex); // probably getHatchFillPatternMSDF returned nullptr } LineStyleInfo lineStyle = {}; @@ -282,16 +291,16 @@ void DrawResourcesFiller::drawFontGlyph( float32_t2 minUV, SIntendedSubmitInfo& intendedNextSubmit) { - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) + if (textureIdx == InvalidTextureIndex) textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), intendedNextSubmit); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); assert(mainObjIdx != InvalidMainObjectIdx); - if (textureIdx != InvalidTextureIdx) + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); if (!addFontGlyph_Internal(glyphInfo, mainObjIdx)) @@ -309,48 +318,167 @@ void DrawResourcesFiller::drawFontGlyph( } } -// TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo -// We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) - -void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { - auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool + /* + * The `suballocatedDescriptorSet` manages indices (slots) into a array of textures binding. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted image was scheduled to be used in the upcoming submission. + */ + auto evictionCallback = [&](const ImageReference& evicted) + { + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // Because we will be writing to the descriptor set location which might be in use. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); + + if (imageUsedForNextIntendedSubmit) { - const size_t remainingResourcesSize = calculateRemainingResourcesSize(); - - const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); - // TODO[ERFAN]: later take into account: our maximum indexable vertex + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo); + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + } + else + { + // The image is not used in the current frame (intended next submit), so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo); + } + }; - if (uploadableObjects <= 0u) - return false; - // Add Geometry - size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(ImageObjectInfo), alignof(ImageObjectInfo)); - void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; - memcpy(dst, &imageObjectInfo, sizeof(ImageObjectInfo)); - - // Push Indices, remove later when compute fills this - uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); - const uint32_t startObj = resourcesCollection.drawObjects.getCount(); - uint32_t i = 0u; - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; - - // Add DrawObjs - DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); - DrawObject drawObj = {}; - drawObj.mainObjIndex = mainObjIdx; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function - drawObj.geometryAddress = geometryBufferOffset; - drawObjectsToBeFilled[0u] = drawObj; + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictionCallback); + inserted->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // in case there was an eviction + auto-submit, we need to update AGAIN - return true; - }; + // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // in which case we don't queue anything for upload, and return the idx + if (inserted->index == InvalidTextureIndex) + { + // This is a new image (cache miss). Allocate a descriptor index for it. + inserted->index = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->index); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + + if (inserted->index != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + IGPUImage::SCreationParams imageParams = {}; + imageParams = cpuImage->getCreationParameters(); + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + }; + imageParams.format = physDev->promoteImageFormat(request,imageParams.tiling); + } + + // Attempt to create a GPU image and image view for this texture. + core::smart_refctd_ptr gpuImageView = nullptr; + + // Attempt to create a GPU image and corresponding image view for this texture. + // If creation or memory allocation fails (likely due to VRAM exhaustion), + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. + while (imagesUsageCache->size() > 0u) + { + // Try creating the image and allocating memory for it: + auto gpuImg = device->createImage(std::move(imageParams)); + if (!gpuImg || !device->allocate(gpuImg->getMemoryReqs(), gpuImg.get()).isValid()) + { + // Failed creating or allocating the image, evict and retry. + if (imagesUsageCache->size() == 1u) + { + // Nothing else to evict; give up. + // We probably have evicted almost every other texture except the one we just allocated an index for + break; + } + + assert(imagesUsageCache->size() > 1u); + + const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); + ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); + if (imageRef) + evictionCallback(*imageRef); + imagesUsageCache->erase(evictionCandidate); + suballocatedDescriptorSet->cull_frees(); // to make sure deallocation requests in eviction callback are waited for. + // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference + // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. + + continue; // Retry allocation after evicting. + } + + IGPUImageView::SCreationParams viewParams = { + .image = gpuImg, + .viewType = IGPUImageView::ET_2D, + .format = gpuImg->getCreationParameters().format + }; + gpuImg->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); + gpuImageView = device->createImageView(std::move(viewParams)); + if (!gpuImageView) + { + // TODO[LOG]: that's rare, image view creation failed. + break; + } + + gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); + break; + } + + if (gpuImageView) + { + StaticImagesCopy copyToStage = + { + .cpuImage = cpuImage, + .gpuImageView = gpuImageView, + .arrayIndex = inserted->index, + }; + staticImagesStagedCopies.push_back(copyToStage); + } + else + { + // All attempts to create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + // TODO: Log a warning or error here – `addStaticImage2D` failed, likely due to low VRAM. + + if (inserted->index != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->index, {}); + inserted->index = InvalidTextureIndex; + } + } + } + else + { + // TODO: log here, index allocation failed. + inserted->index = InvalidTextureIndex; + } + } + + assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + + return inserted->index; +} + +// TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo +// We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) + +void DrawResourcesFiller::addImageObject(image_id imageID, float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) +{ beginMainObject(MainObjectType::IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); @@ -359,7 +487,7 @@ void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 info.topLeft = topLeftPos; info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // info.aspectRatio = size.y / size.x; - info.textureID = 0u; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory if (!addImageObject_Internal(info, mainObjIdx)) { // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects @@ -376,7 +504,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit if (!intendedNextSubmit.valid()) { // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources - // In that case we don't push any uploads (i.e. we don't record any copy commmand in active command buffer, because there is no active command buffer) + // In that case we don't push any uploads (i.e. we don't record any stagedStaticImage commmand in active command buffer, because there is no active command buffer) return false; } @@ -386,12 +514,14 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfStagedCPUImages); + // TODO: pushStaticImagesUploads } else { flushDrawObjects(); success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, msdfStagedCPUImages); + success &= pushStaticImagesUploads(intendedNextSubmit); } return success; } @@ -483,10 +613,7 @@ std::unique_ptr DrawResourcesFiller::createRep ret->resourcesCollection = resourcesCollection; ret->msdfStagedCPUImages = msdfStagedCPUImages; for (auto& stagedMSDF : ret->msdfStagedCPUImages) - { stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. - stagedMSDF.usedThisFrame = false; - } ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; return ret; @@ -568,7 +695,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex auto msdfImage = msdfTextureArray->getCreationParameters().image; - // preparing msdfs for copy + // preparing msdfs for stagedStaticImage using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; image_barrier_t beforeTransferImageBarrier[] = { @@ -682,6 +809,138 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } +bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + bool success = true; + + if (staticImagesStagedCopies.size() > 0ull) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + // DescriptorSet Updates + std::vector descriptorInfos; + std::vector descriptorWrites; + descriptorInfos.resize(staticImagesStagedCopies.size()); + descriptorWrites.resize(staticImagesStagedCopies.size()); + for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + { + auto& stagedStaticImage = staticImagesStagedCopies[i]; + // Bind gpu image view to descriptor set + descriptorInfos[i].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfos[i].desc = stagedStaticImage.gpuImageView; + + // consider batching contiguous writes, if descriptor set updating was a hotspot + descriptorWrites[i].dstSet = descriptorSet, + descriptorWrites[i].binding = imagesArrayBinding; + descriptorWrites[i].arrayElement = stagedStaticImage.arrayIndex; + descriptorWrites[i].count = 1u; + descriptorWrites[i].info = &descriptorInfos[i]; + } + + success &= device->updateDescriptorSets(descriptorWrites.size(), descriptorWrites.data(), 0u, nullptr); + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.resize(staticImagesStagedCopies.size()); + + // Pipeline Barriers before stagedStaticImage + for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + { + auto& stagedStaticImage = staticImagesStagedCopies[i]; + const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + beforeCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + { + auto& stagedStaticImage = staticImagesStagedCopies[i]; + auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + stagedStaticImage.cpuImage->getBuffer()->getPointer(), stagedStaticImage.cpuImage->getCreationParameters().format, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + stagedStaticImage.cpuImage->getRegions()); + } + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.resize(staticImagesStagedCopies.size()); + + // Pipeline Barriers before stagedStaticImage + for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + { + auto& stagedStaticImage = staticImagesStagedCopies[i]; + const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + afterCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + } + + staticImagesStagedCopies.clear(); + if (!success) + { + // TODO: Log + _NBL_DEBUG_BREAK_IF(true); + } + return success; + + } + else + { + // TODO: Log + _NBL_DEBUG_BREAK_IF(true); + return false; + } +} + const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const { assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); @@ -1188,6 +1447,55 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint return true; } +bool DrawResourcesFiller::addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(ImageObjectInfo), alignof(ImageObjectInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &imageObjectInfo, sizeof(ImageObjectInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + ImageReference* imageRef = imagesUsageCache->get(imageID); + if (imageRef) + { + textureIdx = imageRef->index; + imageRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit + } + return textureIdx; +} + void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; @@ -1198,43 +1506,76 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt getHatchFillPatternMSDF = func; } +uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + MSDFReference* tRef = msdfLRUCache->get(msdfInfo); + if (tRef) + { + textureIdx = tRef->alloc_idx; + tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit + } + return textureIdx; +} + uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { if (!cpuImage) - return InvalidTextureIdx; // TODO: Log + return InvalidTextureIndex; // TODO: Log const auto cpuImageSize = cpuImage->getMipSize(0); const bool sizeMatch = cpuImageSize.x == getMSDFResolution().x && cpuImageSize.y == getMSDFResolution().y && cpuImageSize.z == 1u; if (!sizeMatch) - return InvalidTextureIdx; // TODO: Log - - // TextureReferences hold the semaValue related to the "scratch semaphore" in IntendedSubmitInfo - // Every single submit increases this value by 1 - // The reason for holding on to the lastUsedSema is deferred dealloc, which we call in the case of eviction, making sure we get rid of the entry inside the allocator only when the texture is done being used - const auto nextSemaSignal = intendedNextSubmit.getFutureScratchSemaphore(); - + return InvalidTextureIndex; // TODO: Log + + /* + * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. + * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. + */ auto evictionCallback = [&](const MSDFReference& evicted) { - if (msdfStagedCPUImages[evicted.alloc_idx].usedThisFrame) + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: This wait is currently *not* required for correctness because: + // - Both the image upload (stagedStaticImage) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's stagedStaticImage will wait on the prior usage. + // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. + // + // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images + // outside the `intendedNextSubmit` timeline — for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); + + if (imageUsedForNextIntendedSubmit) { - // Dealloc once submission is finished - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded } else { - // We didn't use it this frame, so it's safe to dealloc now, withou needing to "overflow" submit - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx); + // The image is not used in the current frame (intended next submit), so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete (read note above). + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } + + // Clear CPU-side metadata associated with the evicted slot. msdfStagedCPUImages[evicted.alloc_idx].evict(); }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema - MSDFReference* inserted = msdfLRUCache->insert(msdfInput, nextSemaSignal.value, evictionCallback); + MSDFReference* inserted = msdfLRUCache->insert(msdfInput, intendedNextSubmit.getFutureScratchSemaphore().value, evictionCallback); - // if inserted->alloc_idx was not InvalidTextureIdx then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx - if (inserted->alloc_idx == InvalidTextureIdx) + inserted->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // in case there was an eviction + auto-submit, we need to update AGAIN + + // if inserted->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx + if (inserted->alloc_idx == InvalidTextureIndex) { // New insertion == cache miss happened and insertion was successfull inserted->alloc_idx = IndexAllocator::AddressAllocator::invalid_address; @@ -1242,22 +1583,18 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We stage copy, pushMSDFImagesUploads will push it into GPU + // We stage stagedStaticImage, pushMSDFImagesUploads will push it into GPU msdfStagedCPUImages[inserted->alloc_idx].image = std::move(cpuImage); msdfStagedCPUImages[inserted->alloc_idx].uploadedToGPU = false; } else { // TODO: log here, assert will be called in a few lines - inserted->alloc_idx = InvalidTextureIdx; + inserted->alloc_idx = InvalidTextureIndex; } } - assert(inserted->alloc_idx != InvalidTextureIdx); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - if (inserted->alloc_idx != InvalidTextureIdx) - { - msdfStagedCPUImages[inserted->alloc_idx].usedThisFrame = true; - } + assert(inserted->alloc_idx != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->alloc_idx; } @@ -1273,4 +1610,4 @@ void DrawResourcesFiller::flushDrawObjects() drawCalls.push_back(drawCall); drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); } -} +} \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index aec6e3b8e..1a86c09e2 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -3,10 +3,11 @@ #include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" +#include "ImagesUsageCache.h" #include #include #include - +// #include using namespace nbl; using namespace nbl::video; using namespace nbl::core; @@ -126,6 +127,9 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding + void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); + /// @brief Get minimum required size for resources buffer (containing objects and geometry info and their settings) static constexpr size_t getMinimumRequiredResourcesBufferSize() { @@ -200,11 +204,38 @@ struct DrawResourcesFiller float32_t2 minUV, SIntendedSubmitInfo& intendedNextSubmit); - void _test_addImageObject( - float64_t2 topLeftPos, - float32_t2 size, - float32_t rotation, - SIntendedSubmitInfo& intendedNextSubmit); + /** + * @brief Adds a static 2D image to the draw resource set for rendering. + * + * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. + * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. + * + * If the image is already cached and its slot is valid, it returns the slot index directly. + * Otherwise, it performs the following: + * - Allocates a new descriptor set slot. + * - Promotes the image format to be GPU-compatible. + * - Creates a GPU image and GPU image view. + * - Queues the image for uploading via staging in the next submit. + * - If memory is constrained, attempts to evict other images to free up space. + * + * @param imageID Unique identifier for the image resource. + * @param cpuImage The CPU-side image resource to (possibly) upload. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return The index (slot) into the descriptor set array where the image is or will be bound. + * Returns `InvalidTextureIndex` only if all fallback and eviction attempts failed. + * + * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. + * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers + * a flush of pending draws to preserve correctness. + * + * @note The function uses the `imagesUsageCache` LRU cache to track usage and validity of texture slots. + * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + */ + uint32_t addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + + // This function must be called immediately after `addStaticImage` for the same imageID. + void addImageObject(image_id imageID, float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -219,7 +250,6 @@ struct DrawResourcesFiller resetCustomClipRects(); resetLineStyles(); resetDTMSettings(); - resetMSDFsUsageState(); drawObjectsFlushedToDrawCalls = 0ull; drawCalls.clear(); @@ -267,15 +297,12 @@ struct DrawResourcesFiller { core::smart_refctd_ptr image; bool uploadedToGPU : 1u; - // TODO: Use frame counter instead, generalize struct to all textures probably, DONT try to abuse scratchSema.nextSignal as frame tracker, because there can be "cached" draws where no submits happen. - bool usedThisFrame : 1u; bool isValid() const { return image.get() != nullptr; } void evict() { image = nullptr; uploadedToGPU = false; - usedThisFrame = false; } }; @@ -353,6 +380,10 @@ struct DrawResourcesFiller /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages); + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. + /// TODO: Handle for cache&replay mode later + bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); + const size_t calculateRemainingResourcesSize() const; /// @brief Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects @@ -416,6 +447,11 @@ struct DrawResourcesFiller /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); + /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using addStaticImage2D function) + bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; + + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); + void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); @@ -460,12 +496,6 @@ struct DrawResourcesFiller activeDTMSettingsIndex = InvalidDTMSettingsIdx; } - void resetMSDFsUsageState() - { - for (auto& stagedMSDF : msdfStagedCPUImages) - stagedMSDF.usedThisFrame = false; - } - // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t { @@ -526,41 +556,22 @@ struct DrawResourcesFiller }; struct MSDFInputInfoHash { std::size_t operator()(const MSDFInputInfo& info) const { return info.lookupHash; } }; - + struct MSDFReference { uint32_t alloc_idx; uint64_t lastUsedSemaphoreValue; MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedSemaphoreValue(semaphoreVal) {} - MSDFReference(uint64_t semaphoreVal) : MSDFReference(InvalidTextureIdx, semaphoreVal) {} - MSDFReference() : MSDFReference(InvalidTextureIdx, ~0ull) {} + MSDFReference(uint64_t semaphoreVal) : MSDFReference(InvalidTextureIndex, semaphoreVal) {} + MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` inline MSDFReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } }; - uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, SIntendedSubmitInfo& intendedNextSubmit) - { - uint32_t textureIdx = InvalidTextureIdx; - MSDFReference* tRef = msdfLRUCache->get(msdfInfo); - if (tRef) - { - textureIdx = tRef->alloc_idx; - tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit - if (textureIdx >= 0u && textureIdx < msdfStagedCPUImages.size()) - msdfStagedCPUImages[textureIdx].usedThisFrame = true; - else - { - // shouldn't happen, TODO: Log - assert(false); - } - } - return textureIdx; - } + uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - // ! mainObjIdx: make sure to pass your mainObjIdx to it if you want it to stay synced/updated if some overflow submit occured which would potentially erase what your mainObject points at. - // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); // Flushes Current Draw Call and adds to drawCalls @@ -593,6 +604,7 @@ struct DrawResourcesFiller TransformationType activeMainObjectTransformationType; uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops @@ -603,14 +615,29 @@ struct DrawResourcesFiller GetGlyphMSDFTextureFunc getGlyphMSDF; GetHatchFillPatternMSDFTextureFunc getHatchFillPatternMSDF; - using MSDFsLRUCache = core::LRUCache; + using MSDFsLRUCache = core::ResizableLRUCache; smart_refctd_ptr msdfTextureArray; // view to the resource holding all the msdfs in it's layers smart_refctd_ptr msdfTextureArrayIndexAllocator; std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow std::vector msdfStagedCPUImages = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; + + // Images: + std::unique_ptr imagesUsageCache; + smart_refctd_ptr suballocatedDescriptorSet; + uint32_t imagesArrayBinding = 0u; + + // static images (not streamable): + struct StaticImagesCopy + { + core::smart_refctd_ptr cpuImage; + core::smart_refctd_ptr gpuImageView; + uint32_t arrayIndex; + }; + std::vector staticImagesStagedCopies; + bool m_hasInitializedMSDFTextureArrays = false; }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c7fe04603..0aad1669e 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -78,7 +78,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_BUG }; -constexpr ExampleMode mode = ExampleMode::CASE_9; +constexpr ExampleMode mode = ExampleMode::CASE_7; class Camera2D { @@ -666,6 +666,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; // Create DescriptorSetLayout, PipelineLayout and update DescriptorSets + const uint32_t imagesBinding = 3u; { video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { { @@ -690,11 +691,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu .count = 1u, }, { - .binding = 3u, + .binding = imagesBinding, .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = bindlessTextureFlags, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 128u, + .count = ImagesBindingArraySize, }, }; descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); @@ -813,6 +814,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); } + drawResourcesFiller.setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr(descriptorSet0), imagesBinding); + smart_refctd_ptr mainPipelineFragmentShaders = {}; smart_refctd_ptr mainPipelineVertexShader = {}; std::array, 2u> geoTexturePipelineShaders = {}; @@ -1035,6 +1038,55 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + // Load image + system::path m_loadCWD = ".."; + std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; + + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + } + + const auto cpuImage = cpuImgView->getCreationParameters().image; + sampleImages.push_back(cpuImage); + return true; } @@ -1218,6 +1270,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (isCachingDraw) { replayCaches.push_back(drawResourcesFiller.createReplayCache()); + intendedSubmitInfo.scratchSemaphore.value++; // fake advance needed for Texture and MSDF LRU caches and evictions to work return; // we don't record, submit or do anything, just caching the draw resources } @@ -2833,165 +2886,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_7) { - if (m_realFrameIx == 0u) + for (uint32_t i = 0; i < sampleImages.size(); ++i) { - // we record upload of our objects and if we failed to allocate we submit everything - if (!intendedNextSubmit.valid()) - { - // log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } - - // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state - auto* cmdbuf = m_currentRecordingCommandBufferInfo->cmdbuf; - - assert(cmdbuf->getState() == video::IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable()); - assert(cmdbuf->getRecordingFlags().hasFlags(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)); - - auto* cmdpool = cmdbuf->getPool(); - - - // Load image - system::path m_loadCWD = ".."; - std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; - - constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); - const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(),m_loadCWD); - auto bundle = m_assetMgr->getAsset(imagePath,loadParams); - auto contents = bundle.getContents(); - if (contents.empty()) - { - m_logger->log("Failed to load image with path %s, skipping!",ILogger::ELL_ERROR,(m_loadCWD/imagePath).c_str()); - } - - smart_refctd_ptr cpuImgView; - const auto& asset = contents[0]; - switch (asset->getAssetType()) - { - case IAsset::ET_IMAGE: - { - auto image = smart_refctd_ptr_static_cast(asset); - const auto format = image->getCreationParameters().format; - - ICPUImageView::SCreationParams viewParams = { - .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, - .image = std::move(image), - .viewType = IImageView::E_TYPE::ET_2D, - .format = format, - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - } - }; - - cpuImgView = ICPUImageView::create(std::move(viewParams)); - } break; - - case IAsset::ET_IMAGE_VIEW: - cpuImgView = smart_refctd_ptr_static_cast(asset); - break; - default: - m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!",ILogger::ELL_ERROR); - } - - // create matching size gpu image - smart_refctd_ptr gpuImg; - const auto& origParams = cpuImgView->getCreationParameters(); - const auto origImage = origParams.image; - IGPUImage::SCreationParams imageParams = {}; - imageParams = origImage->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) - }; - imageParams.format = m_physicalDevice->promoteImageFormat(request,imageParams.tiling); - } - gpuImg = m_device->createImage(std::move(imageParams)); - if (!gpuImg || !m_device->allocate(gpuImg->getMemoryReqs(),gpuImg.get()).isValid()) - m_logger->log("Failed to create or allocate gpu image!",ILogger::ELL_ERROR); - gpuImg->setObjectDebugName(imagePath.c_str()); - - IGPUImageView::SCreationParams viewParams = { - .image = gpuImg, - .viewType = IGPUImageView::ET_2D, - .format = gpuImg->getCreationParameters().format - }; - auto gpuImgView = m_device->createImageView(std::move(viewParams)); - - // Bind gpu image view to descriptor set - video::IGPUDescriptorSet::SDescriptorInfo dsInfo; - dsInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - dsInfo.desc = gpuImgView; - - IGPUDescriptorSet::SWriteDescriptorSet dsWrites[1u] = - { - { - .dstSet = descriptorSet0.get(), - .binding = 3u, - .arrayElement = 0u, - .count = 1u, - .info = &dsInfo, - } - }; - m_device->updateDescriptorSets(1u, dsWrites, 0u, nullptr); - - // Upload Loaded CPUImageData to GPU - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - } - }; - - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - m_utils->updateImageViaStagingBuffer( - intendedNextSubmit, - origImage->getBuffer()->getPointer(), origImage->getCreationParameters().format, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - origImage->getRegions()); - - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t afterCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from + drawResourcesFiller.addStaticImage2D(imageID, sampleImages[i], intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { 0.0, 0.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); } - drawResourcesFiller._test_addImageObject({ 0.0, 0.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); - drawResourcesFiller._test_addImageObject({ 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); - LineStyleInfo lineStyle = { .color = float32_t4(1.0f, 0.1f, 0.1f, 0.9f), @@ -3517,6 +3418,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::vector> m_shapeMSDFImages = {}; + std::vector> sampleImages; + static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index b565ff4ff..3cf575b27 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -497,6 +497,7 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) } #endif +NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128; NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; @@ -505,7 +506,7 @@ NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; // Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 631e421b9..8b8cf5bad 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -242,7 +242,7 @@ struct PSInput [[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState msdfSampler : register(s4); [[vk::binding(2, 0)]] SamplerState textureSampler : register(s5); -[[vk::binding(3, 0)]] Texture2D textures[128] : register(t5); +[[vk::binding(3, 0)]] Texture2D textures[ImagesBindingArraySize] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 6475faeff..d5949afcf 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -335,7 +335,7 @@ float4 fragMain(PSInput input) : SV_TARGET LineStyle style = loadLineStyle(mainObj.styleIdx); uint32_t textureId = asuint(style.screenSpaceLineWidth); - if (textureId != InvalidTextureIdx) + if (textureId != InvalidTextureIndex) { // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH @@ -349,7 +349,7 @@ float4 fragMain(PSInput input) : SV_TARGET const float2 uv = input.getFontGlyphUV(); const uint32_t textureId = input.getFontGlyphTextureId(); - if (textureId != InvalidTextureIdx) + if (textureId != InvalidTextureIndex) { float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); @@ -380,7 +380,7 @@ float4 fragMain(PSInput input) : SV_TARGET const float2 uv = input.getImageUV(); const uint32_t textureId = input.getImageTextureId(); - if (textureId != InvalidTextureIdx) + if (textureId != InvalidTextureIndex) { float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); textureColor = colorSample.rgb; From ce29c74f5258851d5cafa105758c45b07ed8f516 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 14 May 2025 14:05:49 +0400 Subject: [PATCH 075/129] [WIP] Images.h and ImagesMemorySubAllocator, TODO: postDestroyCleanup and deallocate image from the suballocator --- 62_CAD/DrawResourcesFiller.cpp | 153 ++++++++++++++++++++++++++------- 62_CAD/DrawResourcesFiller.h | 6 +- 62_CAD/Images.h | 153 +++++++++++++++++++++++++++++++++ 3 files changed, 278 insertions(+), 34 deletions(-) create mode 100644 62_CAD/Images.h diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index d5babd393..5948535a9 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -27,6 +27,7 @@ void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) { // TODO: Make this function failable and report insufficient memory if less that getMinimumRequiredResourcesBufferSize, TODO: Have retry mechanism to allocate less mem + // TODO: Allocate buffer memory and image memory with 1 allocation, so that failure and retries are more straightforward. size = core::alignUp(size, ResourcesMaxNaturalAlignment); size = core::max(size, getMinimumRequiredResourcesBufferSize()); // size = 368u; STRESS TEST @@ -39,6 +40,46 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); auto mem = logicalDevice->allocate(memReq, resourcesGPUBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + + // Allocate for Images + { + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); + uint32_t memoryTypeIdx = ~0u; + for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + { + if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + { + memoryTypeIdx = i; + break; + } + } + + if (memoryTypeIdx == ~0u) + { + // TODO: Log, no device local memory found?! weird + assert(false); + } + + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = + { + .size = 512 * 1024 * 1024, // 512 MB + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + imagesMemoryArena = logicalDevice->allocate(allocationInfo); + + if (imagesMemoryArena.isValid()) + { + imagesMemorySubAllocator = std::unique_ptr(new ImagesMemorySubAllocator(allocationInfo.size)); + } + else + { + // LOG: Allocation failure to allocate memory arena for images + assert(false); + } + } + } void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) @@ -350,7 +391,6 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo); } }; - // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. @@ -393,47 +433,94 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma while (imagesUsageCache->size() > 0u) { // Try creating the image and allocating memory for it: - auto gpuImg = device->createImage(std::move(imageParams)); - if (!gpuImg || !device->allocate(gpuImg->getMemoryReqs(), gpuImg.get()).isValid()) + auto gpuImage = device->createImage(std::move(imageParams)); + + if (gpuImage) { - // Failed creating or allocating the image, evict and retry. - if (imagesUsageCache->size() == 1u) + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); + const bool imageMemoryRequirementsMatch = + (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuImageMemoryRequirements.requiresDedicatedAllocation == false); // should not require dedicated allocation + + if (imageMemoryRequirementsMatch) { - // Nothing else to evict; give up. - // We probably have evicted almost every other texture except the one we just allocated an index for + uint64_t allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements); + const bool allocationFromImagesMemoryArenaSuccessfull = allocationOffset != ImagesMemorySubAllocator::InvalidAddress; + if (allocationFromImagesMemoryArenaSuccessfull) + { + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = gpuImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + allocationOffset } + }; + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + IGPUImageView::SCreationParams viewParams = { + .image = gpuImage, + .viewType = IGPUImageView::ET_2D, + .format = gpuImage->getCreationParameters().format + }; + gpuImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); + gpuImageView = device->createImageView(std::move(viewParams)); + if (gpuImageView) + { + gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); + } + else + { + // irrecoverable error if simple image creation fails. + // TODO[LOG]: that's rare, image view creation failed. + } + + // succcessful with everything, just break and get out of this retry loop + break; + } + else + { + // irrecoverable error if simple bindImageMemory fails. + // TODO: LOG + break; + } + } + else + { + // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry + } + } + else + { + // irrecoverable error if memory requirements of the image don't match our preallocated devicememory + // TODO: LOG break; } - - assert(imagesUsageCache->size() > 1u); - - const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); - ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); - if (imageRef) - evictionCallback(*imageRef); - imagesUsageCache->erase(evictionCandidate); - suballocatedDescriptorSet->cull_frees(); // to make sure deallocation requests in eviction callback are waited for. - - // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference - // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. - - continue; // Retry allocation after evicting. } - - IGPUImageView::SCreationParams viewParams = { - .image = gpuImg, - .viewType = IGPUImageView::ET_2D, - .format = gpuImg->getCreationParameters().format - }; - gpuImg->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); - gpuImageView = device->createImageView(std::move(viewParams)); - if (!gpuImageView) + else { - // TODO[LOG]: that's rare, image view creation failed. + // irrecoverable error if simple image creation fails. + // TODO: LOG break; } - gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); - break; + // Getting here means we failed creating or allocating the image, evict and retry. + if (imagesUsageCache->size() == 1u) + { + // Nothing else to evict; give up. + // We probably have evicted almost every other texture except the one we just allocated an index for + break; + } + + assert(imagesUsageCache->size() > 1u); + + const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); + ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); + if (imageRef) + evictionCallback(*imageRef); + imagesUsageCache->erase(evictionCandidate); + suballocatedDescriptorSet->cull_frees(); // to make sure deallocation requests in eviction callback are waited for. + + // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference + // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. } if (gpuImageView) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 1a86c09e2..594c0fba3 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -3,7 +3,7 @@ #include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" -#include "ImagesUsageCache.h" +#include "Images.h" #include #include #include @@ -589,6 +589,10 @@ struct DrawResourcesFiller nbl::core::smart_refctd_ptr resourcesGPUBuffer; size_t copiedResourcesSize; + // GPUImages Memory Arena + AddressAllocator + IDeviceMemoryAllocator::SAllocation imagesMemoryArena; + std::unique_ptr imagesMemorySubAllocator; + // Members smart_refctd_ptr m_utilities; IQueue* m_copyQueue; diff --git a/62_CAD/Images.h b/62_CAD/Images.h new file mode 100644 index 000000000..8b4309669 --- /dev/null +++ b/62_CAD/Images.h @@ -0,0 +1,153 @@ +#pragma once +using namespace nbl; +using namespace nbl::video; +using namespace nbl::core; +using namespace nbl::asset; + +using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. + +struct ImageReference +{ + static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; + uint32_t index = InvalidTextureIndex; // index in our array of textures binding + uint64_t lastUsedSemaphoreValue = 0ull; // last used semaphore value on this image + uint64_t memoryUsage = 0ull; // TODO: to be considered later + + ImageReference() + : index(InvalidTextureIndex) + , lastUsedSemaphoreValue(0ull) + , memoryUsage(0ull) + {} + + // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value + ImageReference(uint64_t semamphoreVal) + : index(InvalidTextureIndex) + , lastUsedSemaphoreValue(semamphoreVal) + , memoryUsage(0ull) + {} + + // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` + inline ImageReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } +}; + +// A resource-aware image cache with an LRU eviction policy. +// This cache tracks image usage by ID and provides hooks for eviction logic, such as releasing descriptor slots and deallocating GPU memory. +// Currently, eviction is purely LRU-based. In the future, eviction decisions may incorporate additional factors: +// - memory usage per image. +// - lastUsedSemaphoreValue. +// This class does not own GPU resources directly, but helps coordinate their lifetimes in sync with GPU usage via eviction callbacks. +class ImagesUsageCache +{ +public: + ImagesUsageCache(size_t capacity) + : lruCache(ImagesLRUCache(capacity)) + {} + + // Attempts to insert a new image into the cache. + // If the cache is full, invokes the provided `evictCallback` to evict an image. + // Returns a pointer to the inserted or existing ImageReference. + template EvictionCallback> + inline ImageReference* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) + { + return lruCache.insert(imageID, lastUsedSema, std::forward(evictCallback)); + } + + // Retrieves the image associated with `imageID`, updating its LRU position. + inline ImageReference* get(image_id imageID) + { + return lruCache.get(imageID); + } + + // Retrieves the ImageReference without updating LRU order. + inline ImageReference* peek(image_id imageID) + { + return lruCache.peek(imageID); + } + + inline size_t size() const { return lruCache.size(); } + + // Selects an eviction candidate based on LRU policy. + // In the future, this could factor in memory pressure or semaphore sync requirements. + inline image_id select_eviction_candidate() + { + const image_id* lru = lruCache.get_least_recently_used(); + if (lru) + return *lru; + else + { + // we shouldn't select eviction candidate if lruCache is empty + _NBL_DEBUG_BREAK_IF(true); + return 0ull; + } + } + + // Removes a specific image from the cache (manual eviction). + inline void erase(image_id imageID) + { + lruCache.erase(imageID); + } + +private: + using ImagesLRUCache = core::ResizableLRUCache; + ImagesLRUCache lruCache; // TODO: for now, work with simple lru cache, later on consider resource usage along with lastUsedSema value +}; + +/** + * @class ImagesMemorySubAllocator + * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. + * + * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation + * for image resources within a contiguous block of GPU memory. + * + * @note This class only manages address offsets. The actual memory must be bound separately. + */ +class ImagesMemorySubAllocator +{ +public: + using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; + using ReservedAllocator = nbl::core::allocator; + static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; + + ImagesMemorySubAllocator() = default; + + ImagesMemorySubAllocator(const uint64_t memoryArenaSize) + { + constexpr uint64_t MaxAlignment = 4096u; // safe choice based on hardware reports + constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better + m_reservedAllocSize = AddressAllocator::reserved_size(MaxAlignment, memoryArenaSize, MinAllocSize); + m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); + m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); + m_addressAllocator = std::unique_ptr(new AddressAllocator( + m_reservedAlloc, 0u, 0u, MaxAlignment, memoryArenaSize, MinAllocSize + )); + + // m_addressAllocator->alloc_addr(bytes, alignment); + // m_addressAllocator->free_addr(addr, bytes) + } + + // return offset, will return InvalidAddress if failed + uint64_t allocate(const nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements& imageMemoryRequirements) + { + return m_addressAllocator->alloc_addr(imageMemoryRequirements.size, 1u << imageMemoryRequirements.alignmentLog2); + } + + void deallocate(uint64_t addr, uint64_t size) + { + m_addressAllocator->free_addr(addr, size); + } + + ~ImagesMemorySubAllocator() + { + if (m_reservedAlloc) + m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); + } + +private: + std::unique_ptr m_addressAllocator = nullptr; + + // Memory Allocation Required for the AddressAllocator + std::unique_ptr m_reservedAllocator = nullptr; + void* m_reservedAlloc = nullptr; + size_t m_reservedAllocSize = 0; + +}; \ No newline at end of file From bd9ee8e18c28e5fc444b732e59e8c68a8f2c97db Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 14 May 2025 17:01:08 +0400 Subject: [PATCH 076/129] Better Image Test + PostCleanup free + Fixing command buffer usage after potential auto-submit --- 62_CAD/DrawResourcesFiller.cpp | 30 ++++++++--- 62_CAD/DrawResourcesFiller.h | 24 +++++++-- 62_CAD/Images.h | 21 +++----- 62_CAD/main.cpp | 96 +++++++++++++++++++--------------- 4 files changed, 106 insertions(+), 65 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 5948535a9..411cb356c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -71,7 +71,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, if (imagesMemoryArena.isValid()) { - imagesMemorySubAllocator = std::unique_ptr(new ImagesMemorySubAllocator(allocationInfo.size)); + imagesMemorySubAllocator = core::make_smart_refctd_ptr(static_cast(allocationInfo.size)); } else { @@ -414,7 +414,6 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma IGPUImage::SCreationParams imageParams = {}; imageParams = cpuImage->getCreationParameters(); imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW { const IPhysicalDevice::SImageFormatPromotionRequest request = { @@ -432,19 +431,28 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. while (imagesUsageCache->size() > 0u) { + // Pre-create the cleanup object that will later be used to release the image's memory range. + // Ownership will be passed to the GPU image, but we retain a temporary raw pointer + // so we can configure the cleanup object *after* allocation succeeds. + std::unique_ptr cleanupObject = std::make_unique(); + ImageCleanup* currentImageCleanup = cleanupObject.get(); + imageParams.postDestroyCleanup = std::move(cleanupObject); + // Try creating the image and allocating memory for it: auto gpuImage = device->createImage(std::move(imageParams)); if (gpuImage) { nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); + uint32_t actualAlignment = 1u << gpuImageMemoryRequirements.alignmentLog2; const bool imageMemoryRequirementsMatch = (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible - (gpuImageMemoryRequirements.requiresDedicatedAllocation == false); // should not require dedicated allocation + (gpuImageMemoryRequirements.requiresDedicatedAllocation == false) && // should not require dedicated allocation + ((ImagesMemorySubAllocator::MaxMemoryAlignment % actualAlignment) == 0u); // should be consistent with our suballocator's max alignment if (imageMemoryRequirementsMatch) { - uint64_t allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements); + uint64_t allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); const bool allocationFromImagesMemoryArenaSuccessfull = allocationOffset != ImagesMemorySubAllocator::InvalidAddress; if (allocationFromImagesMemoryArenaSuccessfull) { @@ -465,6 +473,10 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma gpuImageView = device->createImageView(std::move(viewParams)); if (gpuImageView) { + // SUCESS! + currentImageCleanup->imagesMemorySuballocator = imagesMemorySubAllocator; + currentImageCleanup->addr = allocationOffset; + currentImageCleanup->size = gpuImageMemoryRequirements.size; gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); } else @@ -778,7 +790,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex if (cmdBuffInfo) { - IGPUCommandBuffer* cmdBuff = cmdBuffInfo->cmdbuf; + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; auto msdfImage = msdfTextureArray->getCreationParameters().image; @@ -808,7 +820,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); // Do the copies and advance the iterator. // this is the pattern we use for iterating when entries will get erased if processed successfully, but may get skipped for later. @@ -857,6 +869,8 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + // preparing msdfs for use image_barrier_t afterTransferImageBarrier[] = { @@ -882,7 +896,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); if (!m_hasInitializedMSDFTextureArrays) m_hasInitializedMSDFTextureArrays = true; @@ -977,6 +991,8 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN stagedStaticImage.cpuImage->getRegions()); } + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + std::vector afterCopyImageBarriers; afterCopyImageBarriers.resize(staticImagesStagedCopies.size()); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 594c0fba3..ea60b33cd 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -591,7 +591,7 @@ struct DrawResourcesFiller // GPUImages Memory Arena + AddressAllocator IDeviceMemoryAllocator::SAllocation imagesMemoryArena; - std::unique_ptr imagesMemorySubAllocator; + smart_refctd_ptr imagesMemorySubAllocator; // Members smart_refctd_ptr m_utilities; @@ -626,6 +626,7 @@ struct DrawResourcesFiller std::vector msdfStagedCPUImages = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; + bool m_hasInitializedMSDFTextureArrays = false; // Images: std::unique_ptr imagesUsageCache; @@ -641,7 +642,24 @@ struct DrawResourcesFiller }; std::vector staticImagesStagedCopies; - - bool m_hasInitializedMSDFTextureArrays = false; + struct ImageCleanup : nbl::video::ICleanup + { + ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) + {} + + ~ImageCleanup() override + { + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); + } + + smart_refctd_ptr imagesMemorySuballocator; + uint64_t addr; + uint64_t size; + + }; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 8b4309669..b2772d217 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -101,34 +101,29 @@ class ImagesUsageCache * * @note This class only manages address offsets. The actual memory must be bound separately. */ -class ImagesMemorySubAllocator +class ImagesMemorySubAllocator : public core::IReferenceCounted { public: using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; using ReservedAllocator = nbl::core::allocator; static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; + static constexpr uint64_t MaxMemoryAlignment = 4096u; // safe choice based on hardware reports + static constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better - ImagesMemorySubAllocator() = default; - - ImagesMemorySubAllocator(const uint64_t memoryArenaSize) + ImagesMemorySubAllocator(uint64_t memoryArenaSize) { - constexpr uint64_t MaxAlignment = 4096u; // safe choice based on hardware reports - constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better - m_reservedAllocSize = AddressAllocator::reserved_size(MaxAlignment, memoryArenaSize, MinAllocSize); + m_reservedAllocSize = AddressAllocator::reserved_size(MaxMemoryAlignment, memoryArenaSize, MinAllocSize); m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); m_addressAllocator = std::unique_ptr(new AddressAllocator( - m_reservedAlloc, 0u, 0u, MaxAlignment, memoryArenaSize, MinAllocSize + m_reservedAlloc, 0u, 0u, MaxMemoryAlignment, memoryArenaSize, MinAllocSize )); - - // m_addressAllocator->alloc_addr(bytes, alignment); - // m_addressAllocator->free_addr(addr, bytes) } // return offset, will return InvalidAddress if failed - uint64_t allocate(const nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements& imageMemoryRequirements) + uint64_t allocate(uint64_t size, uint64_t alignment) { - return m_addressAllocator->alloc_addr(imageMemoryRequirements.size, 1u << imageMemoryRequirements.alignmentLog2); + return m_addressAllocator->alloc_addr(size, alignment); } void deallocate(uint64_t addr, uint64_t size) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 0aad1669e..8c6e5c33f 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1040,52 +1040,62 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // Load image system::path m_loadCWD = ".."; - std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; - - constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); - const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); - auto bundle = m_assetMgr->getAsset(imagePath, loadParams); - auto contents = bundle.getContents(); - if (contents.empty()) + constexpr uint32_t SampleImagesCount = 4u; + std::string imagePaths[SampleImagesCount] = { - m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); - } + "../../media/color_space_test/R8G8B8A8_1.png", + "../../media/color_space_test/R8G8B8A8_2.png", + "../../media/color_space_test/R8G8B8_1.png", + "../../media/color_space_test/R8G8B8_1.jpg", + }; - smart_refctd_ptr cpuImgView; - const auto& asset = contents[0]; - switch (asset->getAssetType()) + for (uint32_t i = 0; i < SampleImagesCount; ++i) { - case IAsset::ET_IMAGE: - { - auto image = smart_refctd_ptr_static_cast(asset); - const auto format = image->getCreationParameters().format; - - ICPUImageView::SCreationParams viewParams = { - .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, - .image = std::move(image), - .viewType = IImageView::E_TYPE::ET_2D, - .format = format, - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - } - }; + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePaths[i], loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePaths[i]).c_str()); + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; - cpuImgView = ICPUImageView::create(std::move(viewParams)); - } break; + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + } - case IAsset::ET_IMAGE_VIEW: - cpuImgView = smart_refctd_ptr_static_cast(asset); - break; - default: - m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + const auto cpuImage = cpuImgView->getCreationParameters().image; + sampleImages.push_back(cpuImage); } - - const auto cpuImage = cpuImgView->getCreationParameters().image; - sampleImages.push_back(cpuImage); return true; } @@ -1276,6 +1286,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; @@ -2890,8 +2902,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from drawResourcesFiller.addStaticImage2D(imageID, sampleImages[i], intendedNextSubmit); - drawResourcesFiller.addImageObject(imageID, { 0.0, 0.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); - drawResourcesFiller.addImageObject(imageID, { 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { 0.0 + i * 100.0, 0.0 }, { 100.0 , 100.0 }, 0.0, intendedNextSubmit); + // drawResourcesFiller.addImageObject(imageID, { 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); } LineStyleInfo lineStyle = { From fcff2ac68ea5dafcff8873bd379bd691c003bd59 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 14 May 2025 18:32:29 +0400 Subject: [PATCH 077/129] Small edits and refactor --- 62_CAD/DrawResourcesFiller.h | 20 -------------------- 62_CAD/Images.h | 20 ++++++++++++++++++++ 62_CAD/main.cpp | 9 ++++----- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index ea60b33cd..e87ede3b6 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -641,25 +641,5 @@ struct DrawResourcesFiller uint32_t arrayIndex; }; std::vector staticImagesStagedCopies; - - struct ImageCleanup : nbl::video::ICleanup - { - ImageCleanup() - : imagesMemorySuballocator(nullptr) - , addr(ImagesMemorySubAllocator::InvalidAddress) - , size(0ull) - {} - - ~ImageCleanup() override - { - if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) - imagesMemorySuballocator->deallocate(addr, size); - } - - smart_refctd_ptr imagesMemorySuballocator; - uint64_t addr; - uint64_t size; - - }; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index b2772d217..6e13d19cd 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -145,4 +145,24 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted void* m_reservedAlloc = nullptr; size_t m_reservedAllocSize = 0; +}; + +struct ImageCleanup : nbl::video::ICleanup +{ + ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) + {} + + ~ImageCleanup() override + { + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); + } + + smart_refctd_ptr imagesMemorySuballocator; + uint64_t addr; + uint64_t size; + }; \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 8c6e5c33f..016571fa8 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1040,8 +1040,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // Load image system::path m_loadCWD = ".."; - constexpr uint32_t SampleImagesCount = 4u; - std::string imagePaths[SampleImagesCount] = + std::string imagePaths[] = { "../../media/color_space_test/R8G8B8A8_1.png", "../../media/color_space_test/R8G8B8A8_2.png", @@ -1049,15 +1048,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu "../../media/color_space_test/R8G8B8_1.jpg", }; - for (uint32_t i = 0; i < SampleImagesCount; ++i) + for (const auto& imagePath : imagePaths) { constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); - auto bundle = m_assetMgr->getAsset(imagePaths[i], loadParams); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); auto contents = bundle.getContents(); if (contents.empty()) { - m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePaths[i]).c_str()); + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); } smart_refctd_ptr cpuImgView; From 093099f7c2b524507aa7f1dc0b1e9c089f4921ae Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 15 May 2025 12:21:23 +0400 Subject: [PATCH 078/129] Test and verify overflow submit works with images bound to non dedicated memory arena --- 62_CAD/DrawResourcesFiller.cpp | 48 ++++++---- 62_CAD/Images.h | 161 +++++++++++++++++---------------- 62_CAD/main.cpp | 1 + 3 files changed, 112 insertions(+), 98 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 411cb356c..91d3f22ce 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -62,7 +62,8 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { - .size = 512 * 1024 * 1024, // 512 MB + // TODO: Get from user side. + .size = 70 * 1024 * 1024, // 70 MB .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, .memoryTypeIndex = memoryTypeIdx, .dedication = nullptr, @@ -372,6 +373,12 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // Prepare wait info to defer index deallocation until the GPU has finished using the resource. // Because we will be writing to the descriptor set location which might be in use. ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; + + // will later be used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); @@ -380,7 +387,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // The evicted image is scheduled for use in the upcoming submit. // To avoid rendering artifacts, we must flush the current draw queue now. // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo); + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded } @@ -388,7 +395,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // The image is not used in the current frame (intended next submit), so we can deallocate without submitting any draws. // Still wait on the semaphore to ensure past GPU usage is complete. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo); + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); } }; @@ -431,13 +438,6 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. while (imagesUsageCache->size() > 0u) { - // Pre-create the cleanup object that will later be used to release the image's memory range. - // Ownership will be passed to the GPU image, but we retain a temporary raw pointer - // so we can configure the cleanup object *after* allocation succeeds. - std::unique_ptr cleanupObject = std::make_unique(); - ImageCleanup* currentImageCleanup = cleanupObject.get(); - imageParams.postDestroyCleanup = std::move(cleanupObject); - // Try creating the image and allocating memory for it: auto gpuImage = device->createImage(std::move(imageParams)); @@ -452,14 +452,15 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma if (imageMemoryRequirementsMatch) { - uint64_t allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); - const bool allocationFromImagesMemoryArenaSuccessfull = allocationOffset != ImagesMemorySubAllocator::InvalidAddress; + inserted->allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + const bool allocationFromImagesMemoryArenaSuccessfull = inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress; if (allocationFromImagesMemoryArenaSuccessfull) { + inserted->allocationSize = gpuImageMemoryRequirements.size; nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = { .image = gpuImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + allocationOffset } + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + inserted->allocationOffset } }; const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) @@ -473,10 +474,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma gpuImageView = device->createImageView(std::move(viewParams)); if (gpuImageView) { - // SUCESS! - currentImageCleanup->imagesMemorySuballocator = imagesMemorySubAllocator; - currentImageCleanup->addr = allocationOffset; - currentImageCleanup->size = gpuImageMemoryRequirements.size; + // SUCCESS! gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); } else @@ -543,13 +541,23 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma .gpuImageView = gpuImageView, .arrayIndex = inserted->index, }; + staticImagesStagedCopies.push_back(copyToStage); } else { // All attempts to create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. - // TODO: Log a warning or error here – `addStaticImage2D` failed, likely due to low VRAM. + // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. + // assert(false); + + if (inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(inserted->allocationOffset, inserted->allocationSize); + } if (inserted->index != InvalidTextureIndex) { @@ -568,7 +576,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma } } - assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + // assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->index; } @@ -1647,7 +1655,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. // // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images - // outside the `intendedNextSubmit` timeline — for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + // outside the `intendedNextSubmit` timeline � for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 6e13d19cd..7d9682e36 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -5,25 +5,105 @@ using namespace nbl::core; using namespace nbl::asset; using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. + +/** + * @class ImagesMemorySubAllocator + * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. + * + * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation + * for image resources within a contiguous block of GPU memory. + * + * @note This class only manages address offsets. The actual memory must be bound separately. + */ +class ImagesMemorySubAllocator : public core::IReferenceCounted +{ +public: + using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; + using ReservedAllocator = nbl::core::allocator; + static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; + static constexpr uint64_t MaxMemoryAlignment = 4096u; // safe choice based on hardware reports + static constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better + + ImagesMemorySubAllocator(uint64_t memoryArenaSize) + { + m_reservedAllocSize = AddressAllocator::reserved_size(MaxMemoryAlignment, memoryArenaSize, MinAllocSize); + m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); + m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); + m_addressAllocator = std::unique_ptr(new AddressAllocator( + m_reservedAlloc, 0u, 0u, MaxMemoryAlignment, memoryArenaSize, MinAllocSize + )); + } + + // return offset, will return InvalidAddress if failed + uint64_t allocate(uint64_t size, uint64_t alignment) + { + return m_addressAllocator->alloc_addr(size, alignment); + } + + void deallocate(uint64_t addr, uint64_t size) + { + m_addressAllocator->free_addr(addr, size); + } + + ~ImagesMemorySubAllocator() + { + if (m_reservedAlloc) + m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); + } +private: + std::unique_ptr m_addressAllocator = nullptr; + + // Memory Allocation Required for the AddressAllocator + std::unique_ptr m_reservedAllocator = nullptr; + void* m_reservedAlloc = nullptr; + size_t m_reservedAllocSize = 0; + +}; + +// This will be dropped when the descriptor gets dropped from SuballocatedDescriptorSet. +// Destructor will then deallocate from GeneralPurposeAllocator, making the previously allocated range of the image available/free again. +struct ImageCleanup : public core::IReferenceCounted +{ + ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) + {} + + ~ImageCleanup() override + { + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); + } + + smart_refctd_ptr imagesMemorySuballocator; + uint64_t addr; + uint64_t size; + +}; + struct ImageReference { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; uint32_t index = InvalidTextureIndex; // index in our array of textures binding uint64_t lastUsedSemaphoreValue = 0ull; // last used semaphore value on this image - uint64_t memoryUsage = 0ull; // TODO: to be considered later + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; ImageReference() : index(InvalidTextureIndex) , lastUsedSemaphoreValue(0ull) - , memoryUsage(0ull) + , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) + , allocationSize(0ull) {} // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value ImageReference(uint64_t semamphoreVal) : index(InvalidTextureIndex) , lastUsedSemaphoreValue(semamphoreVal) - , memoryUsage(0ull) + , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) + , allocationSize(0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` @@ -91,78 +171,3 @@ class ImagesUsageCache using ImagesLRUCache = core::ResizableLRUCache; ImagesLRUCache lruCache; // TODO: for now, work with simple lru cache, later on consider resource usage along with lastUsedSema value }; - -/** - * @class ImagesMemorySubAllocator - * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. - * - * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation - * for image resources within a contiguous block of GPU memory. - * - * @note This class only manages address offsets. The actual memory must be bound separately. - */ -class ImagesMemorySubAllocator : public core::IReferenceCounted -{ -public: - using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; - using ReservedAllocator = nbl::core::allocator; - static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; - static constexpr uint64_t MaxMemoryAlignment = 4096u; // safe choice based on hardware reports - static constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better - - ImagesMemorySubAllocator(uint64_t memoryArenaSize) - { - m_reservedAllocSize = AddressAllocator::reserved_size(MaxMemoryAlignment, memoryArenaSize, MinAllocSize); - m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); - m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); - m_addressAllocator = std::unique_ptr(new AddressAllocator( - m_reservedAlloc, 0u, 0u, MaxMemoryAlignment, memoryArenaSize, MinAllocSize - )); - } - - // return offset, will return InvalidAddress if failed - uint64_t allocate(uint64_t size, uint64_t alignment) - { - return m_addressAllocator->alloc_addr(size, alignment); - } - - void deallocate(uint64_t addr, uint64_t size) - { - m_addressAllocator->free_addr(addr, size); - } - - ~ImagesMemorySubAllocator() - { - if (m_reservedAlloc) - m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); - } - -private: - std::unique_ptr m_addressAllocator = nullptr; - - // Memory Allocation Required for the AddressAllocator - std::unique_ptr m_reservedAllocator = nullptr; - void* m_reservedAlloc = nullptr; - size_t m_reservedAllocSize = 0; - -}; - -struct ImageCleanup : nbl::video::ICleanup -{ - ImageCleanup() - : imagesMemorySuballocator(nullptr) - , addr(ImagesMemorySubAllocator::InvalidAddress) - , size(0ull) - {} - - ~ImageCleanup() override - { - if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) - imagesMemorySuballocator->deallocate(addr, size); - } - - smart_refctd_ptr imagesMemorySuballocator; - uint64_t addr; - uint64_t size; - -}; \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 016571fa8..b0e4c8d05 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1517,6 +1517,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { auto retval = device_base_t::getRequiredDeviceFeatures(); retval.fragmentShaderPixelInterlock = FragmentShaderPixelInterlock; + retval.nullDescriptor = true; return retval; } From fc20a2940689494cc79c3525fb16e578855199bf Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 15 May 2025 14:10:28 +0200 Subject: [PATCH 079/129] Grid DTM fundamentals --- 62_CAD/DrawResourcesFiller.cpp | 60 +++++++++++++- 62_CAD/DrawResourcesFiller.h | 9 ++ 62_CAD/main.cpp | 82 ++++++++++++++++++- 62_CAD/shaders/globals.hlsl | 21 +++-- 62_CAD/shaders/main_pipeline/common.hlsl | 13 ++- .../main_pipeline/fragment_shader.hlsl | 19 +++-- .../shaders/main_pipeline/vertex_shader.hlsl | 28 ++++++- 7 files changed, 214 insertions(+), 18 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c83055f0e..cb6e79352 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -311,6 +311,27 @@ void DrawResourcesFiller::drawFontGlyph( // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) +void DrawResourcesFiller::drawGridDTM( + const float64_t2& topLeft, + float64_t height, + float64_t width, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit) +{ + GridDTMInfo gridDTMInfo; + gridDTMInfo.topLeft = topLeft; + gridDTMInfo.height = height; + gridDTMInfo.width = width; + + beginMainObject(MainObjectType::GRID_DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + assert(mainObjectIdx != InvalidMainObjectIdx); + + addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + + endMainObject(); +} void DrawResourcesFiller::_test_addImageObject(float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) { @@ -827,7 +848,7 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten (activeMainObjectType == MainObjectType::POLYLINE) || (activeMainObjectType == MainObjectType::HATCH) || (activeMainObjectType == MainObjectType::TEXT); - const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM); + const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM || activeMainObjectType == MainObjectType::GRID_DTM); const bool needsCustomProjection = (!activeProjectionIndices.empty()); const bool needsCustomClipRect = (!activeClipRectIndices.empty()); @@ -1188,6 +1209,43 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint return true; } +bool DrawResourcesFiller::addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GridDTMInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GridDTMInfo), alignof(GridDTMInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &gridDTMInfo, sizeof(GridDTMInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::GRID_DTM) | (0 << 16)); + //drawObj.geometryAddress = 0; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index a10379e1a..15c4ad9a8 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -199,6 +199,12 @@ struct DrawResourcesFiller float32_t aspectRatio, float32_t2 minUV, SIntendedSubmitInfo& intendedNextSubmit); + + void drawGridDTM(const float64_t2& topLeft, + float64_t height, + float64_t width, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit); void _test_addImageObject( float64_t2 topLeftPos, @@ -416,6 +422,9 @@ struct DrawResourcesFiller /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); + /// Attempts to upload a single GridDTMInfo considering resource limitations + bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); + void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c7fe04603..842712632 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -60,6 +60,7 @@ enum class ExampleMode CASE_8, // MSDF and Text CASE_9, // DTM CASE_BUG, // Bug Repro, after fix, rename to CASE_10 and comment should be: testing fixed geometry and emulated fp64 corner cases + CASE_11, // grid DTM CASE_COUNT }; @@ -75,10 +76,11 @@ constexpr std::array cameraExtents = 10.0, // CASE_7 600.0, // CASE_8 600.0, // CASE_9 - 10.0 // CASE_BUG + 10.0, // CASE_BUG + 600.0 // CASE_11 }; -constexpr ExampleMode mode = ExampleMode::CASE_9; +constexpr ExampleMode mode = ExampleMode::CASE_11; class Camera2D { @@ -3446,6 +3448,82 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::TT_FIXED_SCREENSPACE_SIZE, intendedNextSubmit); } } + else if (mode == ExampleMode::CASE_11) + { + DTMSettingsInfo dtmInfo{}; + //dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourSettingsCount = 2u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 10; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, 400.0f, 800.0f, dtmInfo, intendedNextSubmit); + } } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index b565ff4ff..acbd55d40 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -120,6 +120,7 @@ enum class MainObjectType : uint32_t TEXT, IMAGE, DTM, + GRID_DTM }; enum class ObjectType : uint32_t @@ -130,7 +131,8 @@ enum class ObjectType : uint32_t POLYLINE_CONNECTOR = 3u, FONT_GLYPH = 4u, IMAGE = 5u, - TRIANGLE_MESH = 6u + TRIANGLE_MESH = 6u, + GRID_DTM = 7u }; enum class MajorAxis : uint32_t @@ -232,16 +234,23 @@ struct GlyphInfo // Goes into geometry buffer, needs to be aligned by 8 struct ImageObjectInfo { - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) uint32_t textureID; // 4 bytes (32) }; -/* -GRID DTM Info similar to `ImageObjectInfo` -other than textureID, there will be dtmSettingsIdx referencing a dtmSettings -*/ +// Goes into geometry buffer, needs to be aligned by 8 +struct GridDTMInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t height; // 8 bytes (24) + pfloat64_t width; // 8 bytes (32) + uint32_t textureID; // 4 bytes (36) + uint32_t dtmInfoID; // 4 bytes (40) + float gridCellWidth; // 4 bytes (44) + float _padding; // 4 bytes (48) +}; static uint32_t packR11G11B10_UNORM(float32_t3 color) { diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 631e421b9..e348ca0c2 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -229,7 +229,18 @@ struct PSInput void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } #else // fragment shader float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } -#endif +#endif + + /* GRID DTM */ + uint getHeightMapTextureID() { return data1.x; } + uint getDTMSettingsID() { return data1.y; } + float getGridDTMScreenSpaceCellWidth() { return data2.x; } + float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } + + void setHeightMapTextureID(uint heightMapTextureID) { data1.x = heightMapTextureID; } + void setDTMSettingsID(uint dtmSettingsID) { data1.y = dtmSettingsID; } + void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data2.x = screenSpaceGridWidth; } + void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 6475faeff..6a3953bf2 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -387,7 +387,7 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = colorSample.a; } } - // objType GRID_DTM here + else if (objType == ObjectType::GRID_DTM) { // NOTE: create and read from a texture as a last step, you can generate the height values procedurally from a function while you're working on the sdf stuff. @@ -402,6 +402,16 @@ float4 fragMain(PSInput input) : SV_TARGET // TODO: we need to emulate dilation and do sdf of neighbouring cells as well. because contours, outlines and shading can bleed into other cells for AA. // [NOTE] Do dilation as last step, when everything else works fine + + textureColor = float4(1.0f, 1.0f, 1.0f, 1.0f); + float2 uv = input.getImageUV(); + float scalar = uv.x * uv.x * 0.25f + uv.y * uv.y * 0.25f; + textureColor *= scalar; + localAlpha = 1.0f; + + //return outputColor; + printf("uv = %f, %f", uv.x, uv.y); + } @@ -410,11 +420,8 @@ float4 fragMain(PSInput input) : SV_TARGET if (localAlpha <= 0) discard; - const bool colorFromTexture = objType == ObjectType::IMAGE; - - // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. - // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here - // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. + const bool colorFromTexture = objType == ObjectType::IMAGE || objType == ObjectType::GRID_DTM; + return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); } } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 73225e3c0..4b55c1e30 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -119,7 +119,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.data3 = float4(0, 0, 0, 0); outV.data4 = float4(0, 0, 0, 0); outV.interp_data5 = float2(0, 0); - + if (pc.isDTMRendering) { outV.setObjType(ObjectType::TRIANGLE_MESH); @@ -645,7 +645,31 @@ PSInput main(uint vertexID : SV_VertexID) outV.setImageUV(uv); outV.setImageTextureId(textureID); } - // TODO: Przemek objType GRID_DTM, Similar transformations to IMAGE + else if (objType == ObjectType::GRID_DTM) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + pfloat64_t height = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + pfloat64_t width = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(pfloat64_t), 8u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); + uint32_t dtmSettingsID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); + float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + 2 * sizeof(uint32_t), 8u); + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + pfloat64_t2 vtxPos = topLeft; + if (corner.x) + vtxPos.x = vtxPos.x + width; + if (corner.y) + vtxPos.y = vtxPos.y - height; + + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f); + + outV.setHeightMapTextureID(textureID); + outV.setDTMSettingsID(dtmSettingsID); + outV.setGridDTMScreenSpaceCellWidth(gridCellWidth); // TODO: is input world space? + outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos))); + outV.setImageUV(corner); + } // Make the cage fullscreen for testing: #if 0 From 0b010ea5503ab5f5460183d48d6aefb806c03baf Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 15 May 2025 15:17:09 +0200 Subject: [PATCH 080/129] Updated media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 68dbe85b9..a98646358 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 68dbe85b9849c9b094760428a3639f5c8917d85e +Subproject commit a9864635879e5a616ac400eecd8b6451b498fbf1 From ab6dd2e3bbca153072f6cec0c84117429599e789 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 15 May 2025 15:48:07 +0200 Subject: [PATCH 081/129] Corrections --- 62_CAD/shaders/main_pipeline/common.hlsl | 2 +- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 23dc7b421..ccc30b1b8 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -82,7 +82,7 @@ struct PSInput [[vk::location(2)]] nointerpolation float4 data3 : COLOR3; [[vk::location(3)]] nointerpolation float4 data4 : COLOR4; // Data segments that need interpolation, mostly for hatches - [[vk::location(5)]] float2 interp_data5 : COLOR5; + [[vk::location(5)]] float4 interp_data5 : COLOR5; #ifdef FRAGMENT_SHADER_INPUT [[vk::location(6)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR6; #else diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 4b55c1e30..e92a8d33b 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -118,7 +118,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.data2 = float4(0, 0, 0, 0); outV.data3 = float4(0, 0, 0, 0); outV.data4 = float4(0, 0, 0, 0); - outV.interp_data5 = float2(0, 0); + outV.interp_data5 = float4(0, 0, 0, 0); if (pc.isDTMRendering) { @@ -667,7 +667,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setHeightMapTextureID(textureID); outV.setDTMSettingsID(dtmSettingsID); outV.setGridDTMScreenSpaceCellWidth(gridCellWidth); // TODO: is input world space? - outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos))); + outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setImageUV(corner); } From 34a61a337035d8f2b053f1fed002b5f2c85ffde5 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 16 May 2025 08:58:48 +0400 Subject: [PATCH 082/129] Fix overflow submits in a beautiful way --- 62_CAD/DrawResourcesFiller.cpp | 88 +++++++++++++++++++++------------- 62_CAD/DrawResourcesFiller.h | 20 +++++--- 62_CAD/Images.h | 13 ++--- 62_CAD/main.cpp | 16 ++++--- 4 files changed, 87 insertions(+), 50 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 105374493..c7a074d2f 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -370,31 +370,35 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma */ auto evictionCallback = [&](const ImageReference& evicted) { - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // Because we will be writing to the descriptor set location which might be in use. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; - - // will later be used to release the image's memory range. + // Later used to release the image's memory range. core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; cleanupObject->addr = evicted.allocationOffset; cleanupObject->size = evicted.allocationSize; + - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); - + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. if (imageUsedForNextIntendedSubmit) { // The evicted image is scheduled for use in the upcoming submit. // To avoid rendering artifacts, we must flush the current draw queue now. // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); } else { - // The image is not used in the current frame (intended next submit), so we can deallocate without submitting any draws. + // The image is not used in the current frame, so we can deallocate without submitting any draws. // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); } }; @@ -402,7 +406,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictionCallback); - inserted->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // in case there was an eviction + auto-submit, we need to update AGAIN + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx @@ -481,6 +485,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // irrecoverable error if simple image creation fails. // TODO[LOG]: that's rare, image view creation failed. + _NBL_DEBUG_BREAK_IF(true); } // succcessful with everything, just break and get out of this retry loop @@ -490,11 +495,13 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // irrecoverable error if simple bindImageMemory fails. // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); break; } } else { + // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry } } @@ -502,6 +509,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // irrecoverable error if memory requirements of the image don't match our preallocated devicememory // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); break; } } @@ -509,6 +517,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // irrecoverable error if simple image creation fails. // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); break; } @@ -517,6 +526,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { // Nothing else to evict; give up. // We probably have evicted almost every other texture except the one we just allocated an index for + _NBL_DEBUG_BREAK_IF(true); break; } @@ -527,7 +537,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma if (imageRef) evictionCallback(*imageRef); imagesUsageCache->erase(evictionCandidate); - suballocatedDescriptorSet->cull_frees(); // to make sure deallocation requests in eviction callback are waited for. + while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. @@ -535,13 +545,15 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma if (gpuImageView) { + inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + StaticImagesCopy copyToStage = { .cpuImage = cpuImage, .gpuImageView = gpuImageView, .arrayIndex = inserted->index, }; - + // printf(std::format("Everything success, ImageID={} ArrayIndex={} \n", imageID, inserted->index).c_str()); staticImagesStagedCopies.push_back(copyToStage); } else @@ -549,8 +561,8 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // All attempts to create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. - // assert(false); - + _NBL_DEBUG_BREAK_IF(true); + if (inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { // We previously successfully create and allocated memory for the Image @@ -576,7 +588,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma } } - // assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->index; } @@ -967,8 +979,8 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN descriptorInfos[i].desc = stagedStaticImage.gpuImageView; // consider batching contiguous writes, if descriptor set updating was a hotspot - descriptorWrites[i].dstSet = descriptorSet, - descriptorWrites[i].binding = imagesArrayBinding; + descriptorWrites[i].dstSet = descriptorSet; + descriptorWrites[i].binding = imagesArrayBinding; descriptorWrites[i].arrayElement = stagedStaticImage.arrayIndex; descriptorWrites[i].count = 1u; descriptorWrites[i].info = &descriptorInfos[i]; @@ -1660,7 +1672,7 @@ uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SInten if (imageRef) { textureIdx = imageRef->index; - imageRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit + imageRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frane } return textureIdx; } @@ -1675,6 +1687,13 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt getHatchFillPatternMSDF = func; } +void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) +{ + currentFrameIndex++; + // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index + // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} + uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit) { uint32_t textureIdx = InvalidTextureIndex; @@ -1682,7 +1701,7 @@ uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msd if (tRef) { textureIdx = tRef->alloc_idx; - tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit + tRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frame } return textureIdx; } @@ -1706,31 +1725,36 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor */ auto evictionCallback = [&](const MSDFReference& evicted) { - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // NOTE: This wait is currently *not* required for correctness because: - // - Both the image upload (stagedStaticImage) and usage occur within the same timeline (`intendedNextSubmit`). - // - timeline semaphores guarantee proper ordering: the next submit's stagedStaticImage will wait on the prior usage. + // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: + // - Both the image upload (msdfStagedCPUImages) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfStagedCPUImages will wait on the prior usage. // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. // // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images - // outside the `intendedNextSubmit` timeline � for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.getFutureScratchSemaphore().semaphore, .value = evicted.lastUsedSemaphoreValue }; + // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedSemaphoreValue == intendedNextSubmit.getFutureScratchSemaphore().value); + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); if (imageUsedForNextIntendedSubmit) { // The evicted image is scheduled for use in the upcoming submit. // To avoid rendering artifacts, we must flush the current draw queue now. // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); submitDraws(intendedNextSubmit); reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } else { - // The image is not used in the current frame (intended next submit), so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete (read note above). + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } @@ -1739,9 +1763,9 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema - MSDFReference* inserted = msdfLRUCache->insert(msdfInput, intendedNextSubmit.getFutureScratchSemaphore().value, evictionCallback); + MSDFReference* inserted = msdfLRUCache->insert(msdfInput, currentFrameIndex, evictionCallback); - inserted->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // in case there was an eviction + auto-submit, we need to update AGAIN + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN // if inserted->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx if (inserted->alloc_idx == InvalidTextureIndex) @@ -1752,7 +1776,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We stage stagedStaticImage, pushMSDFImagesUploads will push it into GPU + // We stage msdfStagedCPUImages, pushMSDFImagesUploads will push it into GPU msdfStagedCPUImages[inserted->alloc_idx].image = std::move(cpuImage); msdfStagedCPUImages[inserted->alloc_idx].uploadedToGPU = false; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 801dc41c2..a9b5da172 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -149,16 +149,21 @@ struct DrawResourcesFiller void setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func); void setHatchFillMSDFTextureFunction(const GetHatchFillPatternMSDFTextureFunc& func); + // Must be called at the end of each frame. + // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. + // Registers the semaphore/value that will signal completion of this frame’s draw, + // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. + // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; + void markFrameUsageComplete(uint64_t drawSubmitWaitValue); + // TODO[Przemek]: try to draft up a `CTriangleMesh` Class in it's own header (like CPolyline), simplest form is basically two cpu buffers (1 array of uint index buffer, 1 array of float64_t3 vertexBuffer) // TODO[Przemek]: Then have a `drawMesh` function here similar to drawXXX's below, this will fit both vertex and index buffer in the `geometryBuffer`. // take a `SIntendedSubmitInfo` like others, but don't use it as I don't want you to handle anything regarding autoSubmit // somehow retrieve or calculate the geometry buffer offsets of your vertex and index buffer to be used outside for binding purposes - //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); - //! Draws a fixed-geometry polyline using a custom transformation. //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); @@ -568,14 +573,14 @@ struct DrawResourcesFiller struct MSDFReference { uint32_t alloc_idx; - uint64_t lastUsedSemaphoreValue; + uint64_t lastUsedFrameIndex; - MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedSemaphoreValue(semaphoreVal) {} - MSDFReference(uint64_t semaphoreVal) : MSDFReference(InvalidTextureIndex, semaphoreVal) {} + MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedFrameIndex(semaphoreVal) {} + MSDFReference(uint64_t currentFrameIndex) : MSDFReference(InvalidTextureIndex, currentFrameIndex) {} MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` - inline MSDFReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } + inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); @@ -585,6 +590,9 @@ struct DrawResourcesFiller // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); + // FrameIndex used as a criteria for resource/image eviction in case of limitations + uint32_t currentFrameIndex = 0u; + // Replay Cache override ReplayCache* currentReplayCache = nullptr; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 7d9682e36..7c9609161 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -73,6 +73,7 @@ struct ImageCleanup : public core::IReferenceCounted ~ImageCleanup() override { + // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) imagesMemorySuballocator->deallocate(addr, size); } @@ -87,34 +88,34 @@ struct ImageReference { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; uint32_t index = InvalidTextureIndex; // index in our array of textures binding - uint64_t lastUsedSemaphoreValue = 0ull; // last used semaphore value on this image + uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; ImageReference() : index(InvalidTextureIndex) - , lastUsedSemaphoreValue(0ull) + , lastUsedFrameIndex(0ull) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) {} // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value - ImageReference(uint64_t semamphoreVal) + ImageReference(uint64_t currentFrameIndex) : index(InvalidTextureIndex) - , lastUsedSemaphoreValue(semamphoreVal) + , lastUsedFrameIndex(currentFrameIndex) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` - inline ImageReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } + inline ImageReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; // A resource-aware image cache with an LRU eviction policy. // This cache tracks image usage by ID and provides hooks for eviction logic, such as releasing descriptor slots and deallocating GPU memory. // Currently, eviction is purely LRU-based. In the future, eviction decisions may incorporate additional factors: // - memory usage per image. -// - lastUsedSemaphoreValue. +// - lastUsedFrameIndex. // This class does not own GPU resources directly, but helps coordinate their lifetimes in sync with GPU usage via eviction callbacks. class ImagesUsageCache { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 356ff23aa..238dbedb6 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1044,10 +1044,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu system::path m_loadCWD = ".."; std::string imagePaths[] = { - "../../media/color_space_test/R8G8B8A8_1.png", - "../../media/color_space_test/R8G8B8A8_2.png", - "../../media/color_space_test/R8G8B8_1.png", "../../media/color_space_test/R8G8B8_1.jpg", + "../../media/color_space_test/R8G8B8_1.png", + "../../media/color_space_test/R8G8B8A8_2.png", + "../../media/color_space_test/R8G8B8A8_1.png", }; for (const auto& imagePath : imagePaths) @@ -1280,8 +1280,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u && !finishedCachingDraw; if (isCachingDraw) { + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); replayCaches.push_back(drawResourcesFiller.createReplayCache()); - intendedSubmitInfo.scratchSemaphore.value++; // fake advance needed for Texture and MSDF LRU caches and evictions to work return; // we don't record, submit or do anything, just caching the draw resources } @@ -1454,6 +1454,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (!inBetweenSubmit) cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); if (inBetweenSubmit) { @@ -2903,9 +2905,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu for (uint32_t i = 0; i < sampleImages.size(); ++i) { uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from + //printf(std::format("\n Image {} \n", i).c_str()); drawResourcesFiller.addStaticImage2D(imageID, sampleImages[i], intendedNextSubmit); - drawResourcesFiller.addImageObject(imageID, { 0.0 + i * 100.0, 0.0 }, { 100.0 , 100.0 }, 0.0, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { 0.0 + (i) * 3.0, 0.0 }, { 3.0 , 3.0 }, 0.0, intendedNextSubmit); // drawResourcesFiller.addImageObject(imageID, { 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + //printf("\n"); } LineStyleInfo lineStyle = { @@ -2922,7 +2926,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu linePoints.push_back({ 100.0, -100.0 }); polyline.addLinePoints(linePoints); } - drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); + // drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); } else if (mode == ExampleMode::CASE_8) { From ada9c8b565ee428ea7c2077fc46624247b2c74c5 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 16 May 2025 14:31:07 +0400 Subject: [PATCH 083/129] Cache&Replay with static images --- 62_CAD/DrawResourcesFiller.cpp | 221 +++++++++++++++++++++++++-------- 62_CAD/DrawResourcesFiller.h | 25 ++-- 62_CAD/Images.h | 34 ++++- 3 files changed, 209 insertions(+), 71 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c7a074d2f..b386f02a8 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -63,7 +63,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { // TODO: Get from user side. - .size = 70 * 1024 * 1024, // 70 MB + .size = 170 * 1024 * 1024, // 70 MB .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, .memoryTypeIndex = memoryTypeIdx, .dedication = nullptr, @@ -131,7 +131,7 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); - msdfStagedCPUImages.resize(maxMSDFs); + msdfImagesState.resize(maxMSDFs); } void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit) @@ -368,14 +368,13 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma * - Ensure safe deallocation of the slot. * - Submit any pending draw calls if the evicted image was scheduled to be used in the upcoming submission. */ - auto evictionCallback = [&](const ImageReference& evicted) + auto evictionCallback = [&](image_id imageID, const ImageReference& evicted) { // Later used to release the image's memory range. core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; cleanupObject->addr = evicted.allocationOffset; cleanupObject->size = evicted.allocationSize; - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); @@ -401,6 +400,10 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); } + + // erase imageID from our state map + // kindof mirrors the state of the LRUCache + staticImagesState.erase(imageID); }; // Try inserting or updating the image usage in the cache. @@ -469,12 +472,12 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) { + gpuImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); IGPUImageView::SCreationParams viewParams = { .image = gpuImage, .viewType = IGPUImageView::ET_2D, .format = gpuImage->getCreationParameters().format }; - gpuImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); gpuImageView = device->createImageView(std::move(viewParams)); if (gpuImageView) { @@ -535,7 +538,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); if (imageRef) - evictionCallback(*imageRef); + evictionCallback(evictionCandidate, *imageRef); imagesUsageCache->erase(evictionCandidate); while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. @@ -547,14 +550,17 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma { inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - StaticImagesCopy copyToStage = + StaticImageState newState = { .cpuImage = cpuImage, .gpuImageView = gpuImageView, + .allocationOffset = inserted->allocationOffset, + .allocationSize = inserted->allocationSize, .arrayIndex = inserted->index, + .gpuResident = false, }; // printf(std::format("Everything success, ImageID={} ArrayIndex={} \n", imageID, inserted->index).c_str()); - staticImagesStagedCopies.push_back(copyToStage); + staticImagesState.emplace(imageID, newState); } else { @@ -653,15 +659,126 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit { // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); - success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfStagedCPUImages); - // TODO: pushStaticImagesUploads + success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); + + // Push Static Images Uploads from replay cache, only those who are not gpu resident + auto* device = m_utilities->getLogicalDevice(); + std::vector staticImageCopies; + for (auto& [id, replayImageState] : currentReplayCache->staticImagesState) + { + auto it = staticImagesState.find(id); + bool alreadyResident = false; + + // compare with existing state, and check whether image id is already resident. + if (it != staticImagesState.end()) + { + const StaticImageState& existingState = it->second; + + const bool allocationMatches = + existingState.allocationOffset == replayImageState.allocationOffset && + existingState.allocationSize == replayImageState.allocationSize; + + const bool arrayIndexMatches = existingState.arrayIndex == replayImageState.arrayIndex; + + alreadyResident = allocationMatches && arrayIndexMatches && existingState.gpuResident; + } + + // if already resident, we don't need to do anything + if (alreadyResident) + continue; + + bool successCreateNewImage = false; + + // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + auto existingGPUImageViewParams = replayImageState.gpuImageView->getCreationParameters(); + IGPUImage::SCreationParams imageParams = {}; + imageParams = existingGPUImageViewParams.image->getCreationParameters(); + + auto newGPUImage = device->createImage(std::move(imageParams)); + if (newGPUImage) + { + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = newGPUImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + replayImageState.allocationOffset } + }; + + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + newGPUImage->setObjectDebugName((std::to_string(id) + " Static Image 2D").c_str()); + IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; + viewParams.image = newGPUImage; + + auto newGPUImageView = device->createImageView(std::move(viewParams)); + if (newGPUImageView) + { + successCreateNewImage = true; + + staticImageCopies.push_back(StaticImageCopy { + .cpuImage = replayImageState.cpuImage, + .gpuImageView = newGPUImageView, + .arrayIndex = replayImageState.arrayIndex + }); + + newGPUImageView->setObjectDebugName((std::to_string(id) + " Static Image View 2D").c_str()); + } + + } + } + + if (!successCreateNewImage) + { + // TODO: Log + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + bool replayStaticUploadSuccess = true; + + if (staticImageCopies.size() > 0u) + { + // We need to block for previous submit in order to safely, rebind image's memory and update the descriptor set array index. + // + // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, + // we could recreate the image and image view, binding them to entirely new memory locations. + // This would require an indirection mechanism in the shader to remap references from cached geometry or objects to the new image array indices. + // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + device->blockForSemaphores({ &waitInfo, 1u }); + replayStaticUploadSuccess = pushStaticImagesUploads_Internal(intendedNextSubmit, staticImageCopies); + } + + if (replayStaticUploadSuccess) + { + staticImagesState = currentReplayCache->staticImagesState; + for (auto& [_, state] : staticImagesState) + state.gpuResident = true; + } + + success &= replayStaticUploadSuccess; } else { flushDrawObjects(); success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); - success &= pushMSDFImagesUploads(intendedNextSubmit, msdfStagedCPUImages); - success &= pushStaticImagesUploads(intendedNextSubmit); + success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); + + // Push Static Images Uploads, only those who are not gpu resident + std::vector staticImageCopies; + for (auto& [id, staticImageState] : staticImagesState) + { + if (!staticImageState.gpuResident) + staticImageCopies.push_back(StaticImageCopy{ .cpuImage = staticImageState.cpuImage, .gpuImageView = staticImageState.gpuImageView, .arrayIndex = staticImageState.arrayIndex }); + } + const bool staticImagesUploadSuccess = pushStaticImagesUploads_Internal(intendedNextSubmit, staticImageCopies); + if (staticImagesUploadSuccess) + { + for (auto& [id, staticImageState] : staticImagesState) + staticImageState.gpuResident = true; + } + success &= staticImagesUploadSuccess; } return success; } @@ -751,11 +868,12 @@ std::unique_ptr DrawResourcesFiller::createRep flushDrawObjects(); std::unique_ptr ret = std::unique_ptr(new ReplayCache); ret->resourcesCollection = resourcesCollection; - ret->msdfStagedCPUImages = msdfStagedCPUImages; - for (auto& stagedMSDF : ret->msdfStagedCPUImages) + ret->msdfImagesState = msdfImagesState; + for (auto& stagedMSDF : ret->msdfImagesState) stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; + ret->staticImagesState = staticImagesState; // copy state of static images return ret; } @@ -825,7 +943,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub return true; } -bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) +bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) { auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); @@ -938,7 +1056,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } }; commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); - + if (!m_hasInitializedMSDFTextureArrays) m_hasInitializedMSDFTextureArrays = true; @@ -951,29 +1069,29 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } -bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::pushStaticImagesUploads_Internal(SIntendedSubmitInfo& intendedNextSubmit, std::span staticImagesCopy) { - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); - auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - - if (cmdBuffInfo) - { - bool success = true; + bool success = true; - if (staticImagesStagedCopies.size() > 0ull) + if (staticImagesCopy.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; // DescriptorSet Updates std::vector descriptorInfos; std::vector descriptorWrites; - descriptorInfos.resize(staticImagesStagedCopies.size()); - descriptorWrites.resize(staticImagesStagedCopies.size()); - for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + descriptorInfos.resize(staticImagesCopy.size()); + descriptorWrites.resize(staticImagesCopy.size()); + for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) { - auto& stagedStaticImage = staticImagesStagedCopies[i]; + auto& stagedStaticImage = staticImagesCopy[i]; // Bind gpu image view to descriptor set descriptorInfos[i].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; descriptorInfos[i].desc = stagedStaticImage.gpuImageView; @@ -989,12 +1107,12 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN success &= device->updateDescriptorSets(descriptorWrites.size(), descriptorWrites.data(), 0u, nullptr); std::vector beforeCopyImageBarriers; - beforeCopyImageBarriers.resize(staticImagesStagedCopies.size()); + beforeCopyImageBarriers.resize(staticImagesCopy.size()); // Pipeline Barriers before stagedStaticImage - for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) { - auto& stagedStaticImage = staticImagesStagedCopies[i]; + auto& stagedStaticImage = staticImagesCopy[i]; const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; beforeCopyImageBarriers[i] = { @@ -1021,9 +1139,9 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) { - auto& stagedStaticImage = staticImagesStagedCopies[i]; + auto& stagedStaticImage = staticImagesCopy[i]; auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; success &= m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, @@ -1035,12 +1153,12 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change std::vector afterCopyImageBarriers; - afterCopyImageBarriers.resize(staticImagesStagedCopies.size()); + afterCopyImageBarriers.resize(staticImagesCopy.size()); // Pipeline Barriers before stagedStaticImage - for (uint32_t i = 0u; i < staticImagesStagedCopies.size(); ++i) + for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) { - auto& stagedStaticImage = staticImagesStagedCopies[i]; + auto& stagedStaticImage = staticImagesCopy[i]; const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; afterCopyImageBarriers[i] = { @@ -1067,22 +1185,19 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); } - - staticImagesStagedCopies.clear(); - if (!success) + else { - // TODO: Log _NBL_DEBUG_BREAK_IF(true); + success = false; } - return success; - } - else + + if (!success) { // TODO: Log - _NBL_DEBUG_BREAK_IF(true); - return false; + _NBL_DEBUG_BREAK_IF(true); } + return success; } const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const @@ -1727,8 +1842,8 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor { // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: - // - Both the image upload (msdfStagedCPUImages) and usage occur within the same timeline (`intendedNextSubmit`). - // - timeline semaphores guarantee proper ordering: the next submit's msdfStagedCPUImages will wait on the prior usage. + // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. // // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images @@ -1759,7 +1874,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor } // Clear CPU-side metadata associated with the evicted slot. - msdfStagedCPUImages[evicted.alloc_idx].evict(); + msdfImagesState[evicted.alloc_idx].evict(); }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema @@ -1776,9 +1891,9 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We stage msdfStagedCPUImages, pushMSDFImagesUploads will push it into GPU - msdfStagedCPUImages[inserted->alloc_idx].image = std::move(cpuImage); - msdfStagedCPUImages[inserted->alloc_idx].uploadedToGPU = false; + // We stage msdfImagesState, pushMSDFImagesUploads will push it into GPU + msdfImagesState[inserted->alloc_idx].image = std::move(cpuImage); + msdfImagesState[inserted->alloc_idx].uploadedToGPU = false; } else { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index a9b5da172..8c95b9a09 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -304,7 +304,7 @@ struct DrawResourcesFiller /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj uint32_t getActiveMainObjectIndex() const; - struct MSDFStagedCPUImage + struct MSDFImageState { core::smart_refctd_ptr image; bool uploadedToGPU : 1u; @@ -352,9 +352,10 @@ struct DrawResourcesFiller /// This enables efficient replays without traversing or re-generating scene content. struct ReplayCache { - ResourcesCollection resourcesCollection; - std::vector msdfStagedCPUImages; std::vector drawCallsData; + ResourcesCollection resourcesCollection; + std::vector msdfImagesState; + std::unordered_map staticImagesState; uint32_t activeMainObjectIndex = InvalidMainObjectIdx; // TODO: non msdf general CPU Images // TODO: Get total memory consumption for logging? @@ -389,11 +390,10 @@ struct DrawResourcesFiller bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. - bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages); + bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. - /// TODO: Handle for cache&replay mode later - bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); + bool pushStaticImagesUploads_Internal(SIntendedSubmitInfo& intendedNextSubmit, std::span staticImagesCopy); const size_t calculateRemainingResourcesSize() const; @@ -640,7 +640,7 @@ struct DrawResourcesFiller smart_refctd_ptr msdfTextureArrayIndexAllocator; std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow - std::vector msdfStagedCPUImages = {}; // cached cpu imaged + their status, size equals to LRUCache size + std::vector msdfImagesState = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; @@ -649,13 +649,8 @@ struct DrawResourcesFiller smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; - // static images (not streamable): - struct StaticImagesCopy - { - core::smart_refctd_ptr cpuImage; - core::smart_refctd_ptr gpuImageView; - uint32_t arrayIndex; - }; - std::vector staticImagesStagedCopies; + // TODO: consider removing this and just using the `imagesUsageCache` and `ImageReference` when `core::ResizableLRUCache` is copyable and iterable + // Current state of the static images, used in `pushStaticImagesUploads` to make StaticImages `gpuResident` and bind them to correct array index + std::unordered_map staticImagesState; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 7c9609161..d8c6cf864 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -84,6 +84,27 @@ struct ImageCleanup : public core::IReferenceCounted }; +struct StaticImageCopy +{ + core::smart_refctd_ptr cpuImage; + core::smart_refctd_ptr gpuImageView; + uint32_t arrayIndex; +}; + +// TODO: consider just using the ImagesUsageCache to store this StaticImagesState, i.e. merge this struct with the ImageReference +// it will be possible after LRUCache improvements and copyability +// for now this will be a mirror of the LRUCache but in an unordered_map +struct StaticImageState +{ + core::smart_refctd_ptr cpuImage = nullptr; + core::smart_refctd_ptr gpuImageView = nullptr; + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0u; + uint32_t arrayIndex = ~0u; // in texture array descriptor + bool gpuResident = false; +}; + + struct ImageReference { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; @@ -127,10 +148,17 @@ class ImagesUsageCache // Attempts to insert a new image into the cache. // If the cache is full, invokes the provided `evictCallback` to evict an image. // Returns a pointer to the inserted or existing ImageReference. - template EvictionCallback> + template EvictionCallback> inline ImageReference* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) { - return lruCache.insert(imageID, lastUsedSema, std::forward(evictCallback)); + auto lruEvictionCallback = [&](const ImageReference& evicted) + { + const image_id* evictingKey = lruCache.get_least_recently_used(); + assert(evictingKey != nullptr); + if (evictingKey) + evictCallback(*evictingKey, evicted); + }; + return lruCache.insert(imageID, lastUsedSema, lruEvictionCallback); } // Retrieves the image associated with `imageID`, updating its LRU position. @@ -158,7 +186,7 @@ class ImagesUsageCache { // we shouldn't select eviction candidate if lruCache is empty _NBL_DEBUG_BREAK_IF(true); - return 0ull; + return ~0ull; } } From 2632c3abe127a226c1593a47491a381f32762680 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sun, 18 May 2025 14:59:00 +0400 Subject: [PATCH 084/129] use OrientedBoundingBox2D for images --- 62_CAD/DrawResourcesFiller.cpp | 8 ++++---- 62_CAD/DrawResourcesFiller.h | 2 +- 62_CAD/main.cpp | 3 +-- 62_CAD/shaders/globals.hlsl | 8 ++++++++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b386f02a8..f50e8f317 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -623,16 +623,16 @@ void DrawResourcesFiller::drawGridDTM( endMainObject(); } -void DrawResourcesFiller::addImageObject(image_id imageID, float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); ImageObjectInfo info = {}; - info.topLeft = topLeftPos; - info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // - info.aspectRatio = size.y / size.x; + info.topLeft = obb.topLeft; + info.dirU = obb.dirU; + info.aspectRatio = obb.aspectRatio; info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory if (!addImageObject_Internal(info, mainObjIdx)) { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 8c95b9a09..5501e4c84 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -246,7 +246,7 @@ struct DrawResourcesFiller uint32_t addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. - void addImageObject(image_id imageID, float64_t2 topLeftPos, float32_t2 size, float32_t rotation, SIntendedSubmitInfo& intendedNextSubmit); + void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 238dbedb6..1394bf719 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -2907,8 +2907,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from //printf(std::format("\n Image {} \n", i).c_str()); drawResourcesFiller.addStaticImage2D(imageID, sampleImages[i], intendedNextSubmit); - drawResourcesFiller.addImageObject(imageID, { 0.0 + (i) * 3.0, 0.0 }, { 3.0 , 3.0 }, 0.0, intendedNextSubmit); - // drawResourcesFiller.addImageObject(imageID, { 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); //printf("\n"); } LineStyleInfo lineStyle = diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 538387491..0280b5881 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -528,6 +528,14 @@ NBL_CONSTEXPR float MSDFSize = 32.0f; NBL_CONSTEXPR uint32_t MSDFMips = 4; NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +// Used in CPU-side only for now +struct OrientedBoundingBox2D +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) +}; + #ifdef __HLSL_VERSION [[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); From e2fef3a8627437c307083651d67b40819d71d7d8 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 20 May 2025 12:08:04 +0400 Subject: [PATCH 085/129] start Georeferenced Images work and refactoring similar functionality with Static Images --- 62_CAD/DrawResourcesFiller.cpp | 474 +++++++++++++++++++++------------ 62_CAD/DrawResourcesFiller.h | 49 ++++ 62_CAD/Images.h | 24 +- 3 files changed, 378 insertions(+), 169 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index f50e8f317..b0b3306ae 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -362,65 +362,22 @@ void DrawResourcesFiller::drawFontGlyph( uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { - /* - * The `suballocatedDescriptorSet` manages indices (slots) into a array of textures binding. - * This callback is invoked on eviction, and must: - * - Ensure safe deallocation of the slot. - * - Submit any pending draw calls if the evicted image was scheduled to be used in the upcoming submission. - */ - auto evictionCallback = [&](image_id imageID, const ImageReference& evicted) - { - // Later used to release the image's memory range. - core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); - cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; - cleanupObject->addr = evicted.allocationOffset; - cleanupObject->size = evicted.allocationSize; - - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - - // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. - if (imageUsedForNextIntendedSubmit) - { - // The evicted image is scheduled for use in the upcoming submit. - // To avoid rendering artifacts, we must flush the current draw queue now. - // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - submitDraws(intendedNextSubmit); - reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded - - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // we wait on the signal semaphore for the submit we just did above. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); - } - else - { - // The image is not used in the current frame, so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.index, deallocationWaitInfo, &cleanupObject.get()); - } - - // erase imageID from our state map - // kindof mirrors the state of the LRUCache - staticImagesState.erase(imageID); - }; - // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. - ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictionCallback); + auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx - if (inserted->index == InvalidTextureIndex) + if (inserted->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. - inserted->index = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + inserted->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->index); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (inserted->index != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + if (inserted->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { auto* device = m_utilities->getLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -439,116 +396,16 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // Attempt to create a GPU image and image view for this texture. core::smart_refctd_ptr gpuImageView = nullptr; + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, intendedNextSubmit, std::to_string(imageID)); - // Attempt to create a GPU image and corresponding image view for this texture. - // If creation or memory allocation fails (likely due to VRAM exhaustion), - // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. - while (imagesUsageCache->size() > 0u) - { - // Try creating the image and allocating memory for it: - auto gpuImage = device->createImage(std::move(imageParams)); - - if (gpuImage) - { - nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); - uint32_t actualAlignment = 1u << gpuImageMemoryRequirements.alignmentLog2; - const bool imageMemoryRequirementsMatch = - (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible - (gpuImageMemoryRequirements.requiresDedicatedAllocation == false) && // should not require dedicated allocation - ((ImagesMemorySubAllocator::MaxMemoryAlignment % actualAlignment) == 0u); // should be consistent with our suballocator's max alignment - - if (imageMemoryRequirementsMatch) - { - inserted->allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); - const bool allocationFromImagesMemoryArenaSuccessfull = inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress; - if (allocationFromImagesMemoryArenaSuccessfull) - { - inserted->allocationSize = gpuImageMemoryRequirements.size; - nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = - { - .image = gpuImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + inserted->allocationOffset } - }; - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); - if (boundToMemorySuccessfully) - { - gpuImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); - IGPUImageView::SCreationParams viewParams = { - .image = gpuImage, - .viewType = IGPUImageView::ET_2D, - .format = gpuImage->getCreationParameters().format - }; - gpuImageView = device->createImageView(std::move(viewParams)); - if (gpuImageView) - { - // SUCCESS! - gpuImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); - } - else - { - // irrecoverable error if simple image creation fails. - // TODO[LOG]: that's rare, image view creation failed. - _NBL_DEBUG_BREAK_IF(true); - } - - // succcessful with everything, just break and get out of this retry loop - break; - } - else - { - // irrecoverable error if simple bindImageMemory fails. - // TODO: LOG - _NBL_DEBUG_BREAK_IF(true); - break; - } - } - else - { - // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); - // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry - } - } - else - { - // irrecoverable error if memory requirements of the image don't match our preallocated devicememory - // TODO: LOG - _NBL_DEBUG_BREAK_IF(true); - break; - } - } - else - { - // irrecoverable error if simple image creation fails. - // TODO: LOG - _NBL_DEBUG_BREAK_IF(true); - break; - } - - // Getting here means we failed creating or allocating the image, evict and retry. - if (imagesUsageCache->size() == 1u) - { - // Nothing else to evict; give up. - // We probably have evicted almost every other texture except the one we just allocated an index for - _NBL_DEBUG_BREAK_IF(true); - break; - } - - assert(imagesUsageCache->size() > 1u); - - const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); - ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); - if (imageRef) - evictionCallback(evictionCandidate, *imageRef); - imagesUsageCache->erase(evictionCandidate); - while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. - - // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference - // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. - } - - if (gpuImageView) + if (allocResults.isValid()) { + inserted->imageType = ImageType::STATIC; + inserted->gpuResident = false; inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + inserted->allocationOffset = allocResults.allocationOffset; + inserted->allocationSize = allocResults.allocationSize; + inserted->gpuImageView = allocResults.gpuImageView; StaticImageState newState = { @@ -556,15 +413,14 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma .gpuImageView = gpuImageView, .allocationOffset = inserted->allocationOffset, .allocationSize = inserted->allocationSize, - .arrayIndex = inserted->index, + .arrayIndex = inserted->arrayIndex, .gpuResident = false, }; - // printf(std::format("Everything success, ImageID={} ArrayIndex={} \n", imageID, inserted->index).c_str()); staticImagesState.emplace(imageID, newState); } else { - // All attempts to create the GPU image and its corresponding view have failed. + // All attempts to try create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. _NBL_DEBUG_BREAK_IF(true); @@ -577,26 +433,155 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma imagesMemorySubAllocator->deallocate(inserted->allocationOffset, inserted->allocationSize); } - if (inserted->index != InvalidTextureIndex) + if (inserted->arrayIndex != InvalidTextureIndex) { // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->index, {}); - inserted->index = InvalidTextureIndex; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->arrayIndex, {}); + inserted->arrayIndex = InvalidTextureIndex; } } } else { // TODO: log here, index allocation failed. - inserted->index = InvalidTextureIndex; + inserted->arrayIndex = InvalidTextureIndex; } } - assert(inserted->index != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + assert(inserted->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - return inserted->index; + return inserted->arrayIndex; +} + +uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +{ + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE + IGPUImage::SCreationParams imageCreationParams = {}; + ImageType georeferenceImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + + assert(georeferenceImageType != ImageType::STATIC); + + // imageParams = cpuImage->getCreationParameters(); + imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageCreationParams.usage) + }; + imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); + } + + // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // in which case we don't queue anything for upload, and return the idx + if (inserted->arrayIndex == InvalidTextureIndex) + { + // This is a new image (cache miss). Allocate a descriptor index for it. + inserted->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + + if (inserted->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + // Attempt to create a GPU image and image view for this texture. + core::smart_refctd_ptr gpuImageView = nullptr; + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, intendedNextSubmit, std::to_string(imageID)); + + if (allocResults.isValid()) + { + inserted->imageType = georeferenceImageType; + inserted->gpuResident = false; + inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + inserted->allocationOffset = allocResults.allocationOffset; + inserted->allocationSize = allocResults.allocationSize; + inserted->gpuImageView = allocResults.gpuImageView; + + // TODO: queue update of the set with the gpu image view. + } + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. + _NBL_DEBUG_BREAK_IF(true); + + if (inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(inserted->allocationOffset, inserted->allocationSize); + } + + if (inserted->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->arrayIndex, {}); + inserted->arrayIndex = InvalidTextureIndex; + } + } + } + else + { + // TODO: log here, index allocation failed. + inserted->arrayIndex = InvalidTextureIndex; + } + } + else + { + // found in cache, but does it require resize? recreation? + if (inserted->gpuImageView) + { + auto imgViewParams = inserted->gpuImageView->getCreationParameters(); + if (imgViewParams.image) + { + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + const auto cachedImageType = inserted->imageType; + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto currentParams = static_cast(imageCreationParams); + const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; + if (needsRecreation) + { + // We need to evict the image. + // Find erase the id from the cache, call evictCallback + // wait for the image usage sempahore to finish (later we reallocate and reindex to avoid this) + // try recreating the image (the same try process) + // get the index hopefully from the creation + } + } + else + { + // TODO[LOG] + } + } + else + { + // TODO[LOG] + } + } + + assert(inserted->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + + return inserted->arrayIndex; + // update frame idx + // if found: + // check if needs recreation/resize, if it does, recreate + // if not, return set index + // if not found + // do the recreation process: TRY {create image, allocate and bind memory, create image view}, success --> queue for descriptor set update } // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo @@ -1786,12 +1771,169 @@ uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SInten ImageReference* imageRef = imagesUsageCache->get(imageID); if (imageRef) { - textureIdx = imageRef->index; + textureIdx = imageRef->arrayIndex; imageRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frane } return textureIdx; } +void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const ImageReference& evicted, SIntendedSubmitInfo& intendedNextSubmit) +{ + // Later used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } + else + { + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } + + // erase imageID from our state map + // kindof mirrors the state of the LRUCache for static images + if (evicted.imageType == ImageType::STATIC) + staticImagesState.erase(imageID); +} + +DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName) +{ + ImageAllocateResults ret = {}; + + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + // Attempt to create a GPU image and corresponding image view for this texture. + // If creation or memory allocation fails (likely due to VRAM exhaustion), + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. + while (imagesUsageCache->size() > 0u) + { + // Try creating the image and allocating memory for it: + nbl::video::IGPUImage::SCreationParams params = {}; + params = imageParams; + auto gpuImage = device->createImage(std::move(params)); + + if (gpuImage) + { + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); + uint32_t actualAlignment = 1u << gpuImageMemoryRequirements.alignmentLog2; + const bool imageMemoryRequirementsMatch = + (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuImageMemoryRequirements.requiresDedicatedAllocation == false) && // should not require dedicated allocation + ((ImagesMemorySubAllocator::MaxMemoryAlignment % actualAlignment) == 0u); // should be consistent with our suballocator's max alignment + + if (imageMemoryRequirementsMatch) + { + ret.allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + const bool allocationFromImagesMemoryArenaSuccessfull = ret.allocationOffset != ImagesMemorySubAllocator::InvalidAddress; + if (allocationFromImagesMemoryArenaSuccessfull) + { + ret.allocationSize = gpuImageMemoryRequirements.size; + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = gpuImage.get(), + .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } + }; + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + gpuImage->setObjectDebugName(imageDebugName.c_str()); + IGPUImageView::SCreationParams viewParams = { + .image = gpuImage, + .viewType = IGPUImageView::ET_2D, + .format = gpuImage->getCreationParameters().format + }; + ret.gpuImageView = device->createImageView(std::move(viewParams)); + if (ret.gpuImageView) + { + // SUCCESS! + ret.gpuImageView->setObjectDebugName((imageDebugName + " View").c_str()); + } + else + { + // irrecoverable error if simple image creation fails. + // TODO[LOG]: that's rare, image view creation failed. + _NBL_DEBUG_BREAK_IF(true); + } + + // succcessful with everything, just break and get out of this retry loop + break; + } + else + { + // irrecoverable error if simple bindImageMemory fails. + // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); + // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry + } + } + else + { + // irrecoverable error if memory requirements of the image don't match our preallocated devicememory + // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + // irrecoverable error if simple image creation fails. + // TODO: LOG + _NBL_DEBUG_BREAK_IF(true); + break; + } + + // Getting here means we failed creating or allocating the image, evict and retry. + if (imagesUsageCache->size() == 1u) + { + // Nothing else to evict; give up. + // We probably have evicted almost every other texture except the one we just allocated an index for + _NBL_DEBUG_BREAK_IF(true); + break; + } + + assert(imagesUsageCache->size() > 1u); + + const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); + ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); + if (imageRef) + evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); + imagesUsageCache->erase(evictionCandidate); + while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + + // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference + // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. + } + + return ret; +} + void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 5501e4c84..f805c0a82 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -245,6 +245,8 @@ struct DrawResourcesFiller */ uint32_t addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + uint32_t retrieveGeoreferencedImage_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + // This function must be called immediately after `addStaticImage` for the same imageID. void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); @@ -465,6 +467,53 @@ struct DrawResourcesFiller uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); + /** + * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. + * + * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). + * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid + * use-after-free issues. Otherwise, it proceeds with deallocation immediately. + * + * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator + * only after the GPU has finished using it, guarded by a semaphore wait. + * + * @param imageID The unique ID of the image being evicted. + * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. + * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. + * + * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] + */ + void evictImage_SubmitIfNeeded(image_id imageID, const ImageReference& evicted, SIntendedSubmitInfo& intendedNextSubmit); + + struct ImageAllocateResults + { + nbl::core::smart_refctd_ptr gpuImageView = nullptr; + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; + bool isValid() const { return (gpuImageView && (allocationOffset != ImagesMemorySubAllocator::InvalidAddress)); } + }; + + /** + * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. + * + * This function tries to create a GPU image using the specified creation parameters, allocate memory + * from the shared image memory arena, bind it to device-local memory, and create an associated image view. + * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal + * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. + * + * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. + * + * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. + * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. + * + * @return ImageAllocateResults A struct containing: + * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). + * - `allocationSize`: Size of the allocated memory region. + * - `gpuImageView`: The created GPU image view (nullptr if creation failed). + */ + ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string debugName = "UnnamedNablaImage"); + void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index d8c6cf864..d93c47d3c 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -6,6 +6,20 @@ using namespace nbl::asset; using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. +enum class ImageType : uint8_t +{ + STATIC = 0, // Regular non-georeferenced image, fully loaded once + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view + GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant +}; + +struct GeoreferencedImageParams +{ + uint32_t2 imageExtents; + uint32_t2 viewportExtents; + asset::E_FORMAT format; +}; + /** * @class ImagesMemorySubAllocator * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. @@ -108,13 +122,17 @@ struct StaticImageState struct ImageReference { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; - uint32_t index = InvalidTextureIndex; // index in our array of textures binding + + uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding + ImageType imageType; + bool gpuResident = false; uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; + core::smart_refctd_ptr gpuImageView = nullptr; ImageReference() - : index(InvalidTextureIndex) + : arrayIndex(InvalidTextureIndex) , lastUsedFrameIndex(0ull) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) @@ -122,7 +140,7 @@ struct ImageReference // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value ImageReference(uint64_t currentFrameIndex) - : index(InvalidTextureIndex) + : arrayIndex(InvalidTextureIndex) , lastUsedFrameIndex(currentFrameIndex) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) From a7143525763141d4f3ec05511fd15bae92c5e60c Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 20 May 2025 12:12:35 +0400 Subject: [PATCH 086/129] small fix --- 62_CAD/DrawResourcesFiller.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b0b3306ae..c33356cc2 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -395,7 +395,6 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma } // Attempt to create a GPU image and image view for this texture. - core::smart_refctd_ptr gpuImageView = nullptr; ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, intendedNextSubmit, std::to_string(imageID)); if (allocResults.isValid()) @@ -410,7 +409,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma StaticImageState newState = { .cpuImage = cpuImage, - .gpuImageView = gpuImageView, + .gpuImageView = allocResults->gpuImageView, .allocationOffset = inserted->allocationOffset, .allocationSize = inserted->allocationSize, .arrayIndex = inserted->arrayIndex, From 596751c0b9c212393e94bde8c084aaf1f7f81b34 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 20 May 2025 12:13:04 +0400 Subject: [PATCH 087/129] small fix2 --- 62_CAD/DrawResourcesFiller.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c33356cc2..b3ac66ce6 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -409,9 +409,9 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma StaticImageState newState = { .cpuImage = cpuImage, - .gpuImageView = allocResults->gpuImageView, - .allocationOffset = inserted->allocationOffset, - .allocationSize = inserted->allocationSize, + .gpuImageView = allocResults.gpuImageView, + .allocationOffset = allocResults.allocationOffset, + .allocationSize = allocResults.allocationSize, .arrayIndex = inserted->arrayIndex, .gpuResident = false, }; From 2cbc2b068c7893e9efff9a90c4cd241506b15ed1 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 20 May 2025 15:11:57 +0400 Subject: [PATCH 088/129] [Untested] more work on Georeferenced images creation and recreating/resizing --- 62_CAD/DrawResourcesFiller.cpp | 210 +++++++++++++++++++-------------- 62_CAD/DrawResourcesFiller.h | 13 ++ 62_CAD/Images.h | 19 +-- 3 files changed, 147 insertions(+), 95 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b3ac66ce6..9d638a920 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -365,19 +365,19 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + ImageReference* cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // if cachedImageReference->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx - if (inserted->arrayIndex == InvalidTextureIndex) + if (cachedImageReference->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. - inserted->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + cachedImageReference->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageReference->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (inserted->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + if (cachedImageReference->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { auto* device = m_utilities->getLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -399,12 +399,12 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma if (allocResults.isValid()) { - inserted->imageType = ImageType::STATIC; - inserted->gpuResident = false; - inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - inserted->allocationOffset = allocResults.allocationOffset; - inserted->allocationSize = allocResults.allocationSize; - inserted->gpuImageView = allocResults.gpuImageView; + cachedImageReference->imageType = ImageType::STATIC; + cachedImageReference->gpuResident = false; + cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageReference->allocationOffset = allocResults.allocationOffset; + cachedImageReference->allocationSize = allocResults.allocationSize; + cachedImageReference->gpuImageView = allocResults.gpuImageView; StaticImageState newState = { @@ -412,7 +412,7 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma .gpuImageView = allocResults.gpuImageView, .allocationOffset = allocResults.allocationOffset, .allocationSize = allocResults.allocationSize, - .arrayIndex = inserted->arrayIndex, + .arrayIndex = cachedImageReference->arrayIndex, .gpuResident = false, }; staticImagesState.emplace(imageID, newState); @@ -424,34 +424,37 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. _NBL_DEBUG_BREAK_IF(true); - if (inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + if (cachedImageReference->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { // We previously successfully create and allocated memory for the Image // but failed to bind and create image view // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(inserted->allocationOffset, inserted->allocationSize); + imagesMemorySubAllocator->deallocate(cachedImageReference->allocationOffset, cachedImageReference->allocationSize); } - if (inserted->arrayIndex != InvalidTextureIndex) + if (cachedImageReference->arrayIndex != InvalidTextureIndex) { // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->arrayIndex, {}); - inserted->arrayIndex = InvalidTextureIndex; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageReference->arrayIndex, {}); + cachedImageReference->arrayIndex = InvalidTextureIndex; } } } else { // TODO: log here, index allocation failed. - inserted->arrayIndex = InvalidTextureIndex; + cachedImageReference->arrayIndex = InvalidTextureIndex; } } - assert(inserted->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + assert(cachedImageReference->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageReference->lastUsedFrameIndex = currentFrameIndex; - return inserted->arrayIndex; + return cachedImageReference->arrayIndex; } uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) @@ -462,12 +465,12 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - ImageReference* inserted = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + ImageReference* cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE IGPUImage::SCreationParams imageCreationParams = {}; - ImageType georeferenceImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + ImageType georeferenceImageType; + determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); assert(georeferenceImageType != ImageType::STATIC); @@ -481,30 +484,65 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ }; imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); } + + // if cachedImageReference->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // But we need to check if the cached image needs resizing/recreation. + if (cachedImageReference->arrayIndex != InvalidTextureIndex) + { + // found in cache, but does it require resize? recreation? + if (cachedImageReference->gpuImageView) + { + auto imgViewParams = cachedImageReference->gpuImageView->getCreationParameters(); + if (imgViewParams.image) + { + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + const auto cachedImageType = cachedImageReference->imageType; + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto currentParams = static_cast(imageCreationParams); + const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; + if (needsRecreation) + { + // call the eviction callbacl so the currently cached imageID gets eventually deallocated from memory arena. + evictCallback(imageID, *cachedImageReference); + + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + *cachedImageReference = ImageReference(currentFrameIndex); + // imagesUsageCache->erase(imageID); + // cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + } + } + else + { + // TODO[LOG] + } + } + else + { + // TODO[LOG] + } + } - // if inserted->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx - if (inserted->arrayIndex == InvalidTextureIndex) + if (cachedImageReference->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. - inserted->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + cachedImageReference->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &inserted->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageReference->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (inserted->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + if (cachedImageReference->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { // Attempt to create a GPU image and image view for this texture. - core::smart_refctd_ptr gpuImageView = nullptr; ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, intendedNextSubmit, std::to_string(imageID)); if (allocResults.isValid()) { - inserted->imageType = georeferenceImageType; - inserted->gpuResident = false; - inserted->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - inserted->allocationOffset = allocResults.allocationOffset; - inserted->allocationSize = allocResults.allocationSize; - inserted->gpuImageView = allocResults.gpuImageView; + cachedImageReference->imageType = georeferenceImageType; + cachedImageReference->gpuResident = false; + cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageReference->allocationOffset = allocResults.allocationOffset; + cachedImageReference->allocationSize = allocResults.allocationSize; + cachedImageReference->gpuImageView = allocResults.gpuImageView; // TODO: queue update of the set with the gpu image view. } @@ -515,72 +553,37 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. _NBL_DEBUG_BREAK_IF(true); - if (inserted->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + if (cachedImageReference->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { // We previously successfully create and allocated memory for the Image // but failed to bind and create image view // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(inserted->allocationOffset, inserted->allocationSize); + imagesMemorySubAllocator->deallocate(cachedImageReference->allocationOffset, cachedImageReference->allocationSize); } - if (inserted->arrayIndex != InvalidTextureIndex) + if (cachedImageReference->arrayIndex != InvalidTextureIndex) { // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &inserted->arrayIndex, {}); - inserted->arrayIndex = InvalidTextureIndex; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageReference->arrayIndex, {}); + cachedImageReference->arrayIndex = InvalidTextureIndex; } } } else { // TODO: log here, index allocation failed. - inserted->arrayIndex = InvalidTextureIndex; - } - } - else - { - // found in cache, but does it require resize? recreation? - if (inserted->gpuImageView) - { - auto imgViewParams = inserted->gpuImageView->getCreationParameters(); - if (imgViewParams.image) - { - const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); - const auto cachedImageType = inserted->imageType; - // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus - const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; - if (needsRecreation) - { - // We need to evict the image. - // Find erase the id from the cache, call evictCallback - // wait for the image usage sempahore to finish (later we reallocate and reindex to avoid this) - // try recreating the image (the same try process) - // get the index hopefully from the creation - } - } - else - { - // TODO[LOG] - } - } - else - { - // TODO[LOG] + cachedImageReference->arrayIndex = InvalidTextureIndex; } } + + assert(cachedImageReference->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - assert(inserted->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageReference->lastUsedFrameIndex = currentFrameIndex; - return inserted->arrayIndex; - // update frame idx - // if found: - // check if needs recreation/resize, if it does, recreate - // if not, return set index - // if not found - // do the recreation process: TRY {create image, allocate and bind memory, create image view}, success --> queue for descriptor set update + return cachedImageReference->arrayIndex; } // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo @@ -1778,6 +1781,11 @@ uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SInten void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const ImageReference& evicted, SIntendedSubmitInfo& intendedNextSubmit) { + if (evicted.arrayIndex == InvalidTextureIndex) + { + _NBL_DEBUG_BREAK_IF(true); // shouldn't happen under normal circumstances, TODO: LOG warning + return; + } // Later used to release the image's memory range. core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; @@ -1804,7 +1812,7 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Imag { // The image is not used in the current frame, so we can deallocate without submitting any draws. // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); } @@ -1824,7 +1832,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo // Attempt to create a GPU image and corresponding image view for this texture. // If creation or memory allocation fails (likely due to VRAM exhaustion), - // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-inserted image remains. + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageReference image remains. while (imagesUsageCache->size() > 0u) { // Try creating the image and allocating memory for it: @@ -1933,6 +1941,36 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo return ret; } +void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams) +{ + // Decide whether the image can reside fully into memory rather than get streamed. + // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not + const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; + + if (betterToResideFullyInMem) + outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + else + outImageType = ImageType::GEOREFERENCED_STREAMED; + + outImageParams.type = asset::IImage::ET_2D; + outImageParams.samples = asset::IImage::ESCF_1_BIT; + outImageParams.format = georeferencedImageParams.format; + + if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + { + outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; + } + else + { + // TODO: Better Logic, area around the view, etc... + outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u }; + } + + + outImageParams.mipLevels = 1u; // TODO: Later do mipmapping + outImageParams.arrayLayers = 1u; +} + void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; @@ -2023,7 +2061,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - // if inserted->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx + // if cachedImageReference->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx if (inserted->alloc_idx == InvalidTextureIndex) { // New insertion == cache miss happened and insertion was successfull diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f805c0a82..ae071654a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -514,6 +514,19 @@ struct DrawResourcesFiller */ ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string debugName = "UnnamedNablaImage"); + /** + * @brief Determines creation parameters for a georeferenced image based on heuristics. + * + * This function decides whether a georeferenced image should be treated as a fully resident GPU texture + * or as a streamable image based on the relationship between its total resolution and the viewport size. + * It then fills out the appropriate Nabla image creation parameters. + * + * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). + * @param[out] outImageType Indicates whether the image should be fully resident or streamed. + * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. + */ + void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); + void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index d93c47d3c..fe3e8bde9 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -8,7 +8,8 @@ using image_id = uint64_t; // Could later be templated or replaced with a strong enum class ImageType : uint8_t { - STATIC = 0, // Regular non-georeferenced image, fully loaded once + INVALID = 0, + STATIC, // Regular non-georeferenced image, fully loaded once GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant }; @@ -124,26 +125,26 @@ struct ImageReference static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding - ImageType imageType; + ImageType imageType = ImageType::INVALID; bool gpuResident = false; uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; core::smart_refctd_ptr gpuImageView = nullptr; - - ImageReference() - : arrayIndex(InvalidTextureIndex) - , lastUsedFrameIndex(0ull) - , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) - , allocationSize(0ull) - {} // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value ImageReference(uint64_t currentFrameIndex) : arrayIndex(InvalidTextureIndex) + , imageType(ImageType::INVALID) + , gpuResident(false) , lastUsedFrameIndex(currentFrameIndex) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) + , gpuImageView(nullptr) + {} + + ImageReference() + : ImageReference(0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` From f1fb1b525bbbd8415bba0978289b82c5ee788814 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 20 May 2025 22:14:06 -0300 Subject: [PATCH 089/129] Adds Cache iteration test --- 21_LRUCacheUnitTest/main.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/21_LRUCacheUnitTest/main.cpp b/21_LRUCacheUnitTest/main.cpp index 1c63fc744..1e7830b16 100644 --- a/21_LRUCacheUnitTest/main.cpp +++ b/21_LRUCacheUnitTest/main.cpp @@ -5,6 +5,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoSystemMonoLoggerApplication.hpp" +#include using namespace nbl; using namespace core; @@ -180,6 +181,28 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL cache3.insert(1, "bar"); cache3.clear(); + // Cache iterator test + constexpr uint32_t cache4Size = 10; + ResizableLRUCache cache4(cache4Size); + for (auto i = 0u; i < cache4Size; i++) + { + cache4.insert(i, i); + } + // Default iterator is MRU -> LRU + uint32_t counter = cache4Size - 1; + for (auto& pair : cache4) + { + assert(pair.first == counter && pair.second == counter); + counter--; + } + // Reverse LRU -> MRU traversal + counter = 0u; + for (auto it = cache4.crbegin(); it != cache4.crend(); it++) + { + assert(it->first == counter && it->second == counter); + counter++; + } + // Besides the disposal function that gets called when evicting, we need to check that the Cache properly destroys all resident `Key,Value` pairs when destroyed struct Foo { @@ -208,9 +231,9 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL int destroyCounter = 0; { - ResizableLRUCache cache4(10u); + ResizableLRUCache cache5(10u); for (int i = 0; i < 10; i++) - cache4.insert(i, Foo(&destroyCounter)); + cache5.insert(i, Foo(&destroyCounter)); int x = 0; } From 3f40b925c97ece98c72527d5c85d2593471f26c9 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 21 May 2025 07:52:41 +0400 Subject: [PATCH 090/129] small comment --- 62_CAD/DrawResourcesFiller.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 9d638a920..510faf6d2 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -1945,6 +1945,7 @@ void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset:: { // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not + // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; if (betterToResideFullyInMem) From fccbcb2894941d3743021dee874dbe545ea317b5 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 21 May 2025 16:40:35 +0400 Subject: [PATCH 091/129] addGeoreferencedImage, and image cache heavy refactors (Image States) --- 62_CAD/DrawResourcesFiller.cpp | 432 ++++++++++-------- 62_CAD/DrawResourcesFiller.h | 54 ++- 62_CAD/Images.h | 103 ++--- 62_CAD/main.cpp | 11 +- 62_CAD/shaders/globals.hlsl | 20 +- .../main_pipeline/fragment_shader.hlsl | 17 +- .../shaders/main_pipeline/vertex_shader.hlsl | 23 +- 7 files changed, 396 insertions(+), 264 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 510faf6d2..425834a99 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -7,7 +7,7 @@ DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, I m_utilities(utils), m_copyQueue(copyQueue) { - imagesUsageCache = std::unique_ptr(new ImagesUsageCache(ImagesBindingArraySize)); + imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); } // function is called when buffer is filled and we should submit draws and clear the buffers and continue filling @@ -63,7 +63,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { // TODO: Get from user side. - .size = 170 * 1024 * 1024, // 70 MB + .size = 270 * 1024 * 1024, // 70 MB .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, .memoryTypeIndex = memoryTypeIdx, .dedication = nullptr, @@ -360,24 +360,24 @@ void DrawResourcesFiller::drawFontGlyph( } } -uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureStaticImageAvailability(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - ImageReference* cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - // if cachedImageReference->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx - if (cachedImageReference->arrayIndex == InvalidTextureIndex) + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. - cachedImageReference->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageReference->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (cachedImageReference->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { auto* device = m_utilities->getLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -399,23 +399,13 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma if (allocResults.isValid()) { - cachedImageReference->imageType = ImageType::STATIC; - cachedImageReference->gpuResident = false; - cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - cachedImageReference->allocationOffset = allocResults.allocationOffset; - cachedImageReference->allocationSize = allocResults.allocationSize; - cachedImageReference->gpuImageView = allocResults.gpuImageView; - - StaticImageState newState = - { - .cpuImage = cpuImage, - .gpuImageView = allocResults.gpuImageView, - .allocationOffset = allocResults.allocationOffset, - .allocationSize = allocResults.allocationSize, - .arrayIndex = cachedImageReference->arrayIndex, - .gpuResident = false, - }; - staticImagesState.emplace(imageID, newState); + cachedImageRecord->type = ImageType::STATIC; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = cpuImage; } else { @@ -424,48 +414,48 @@ uint32_t DrawResourcesFiller::addStaticImage2D(image_id imageID, const core::sma // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. _NBL_DEBUG_BREAK_IF(true); - if (cachedImageReference->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { // We previously successfully create and allocated memory for the Image // but failed to bind and create image view // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(cachedImageReference->allocationOffset, cachedImageReference->allocationSize); + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); } - if (cachedImageReference->arrayIndex != InvalidTextureIndex) + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) { // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageReference->arrayIndex, {}); - cachedImageReference->arrayIndex = InvalidTextureIndex; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } } else { // TODO: log here, index allocation failed. - cachedImageReference->arrayIndex = InvalidTextureIndex; + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - assert(cachedImageReference->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed // cached or just inserted, we update the lastUsedFrameIndex - cachedImageReference->lastUsedFrameIndex = currentFrameIndex; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; - return cachedImageReference->arrayIndex; + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return cachedImageRecord->arrayIndex != InvalidTextureIndex; } -uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) { auto* device = m_utilities->getLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const ImageReference& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - ImageReference* cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE IGPUImage::SCreationParams imageCreationParams = {}; @@ -485,30 +475,30 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); } - // if cachedImageReference->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // But we need to check if the cached image needs resizing/recreation. - if (cachedImageReference->arrayIndex != InvalidTextureIndex) + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) { // found in cache, but does it require resize? recreation? - if (cachedImageReference->gpuImageView) + if (cachedImageRecord->gpuImageView) { - auto imgViewParams = cachedImageReference->gpuImageView->getCreationParameters(); + auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); if (imgViewParams.image) { const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); - const auto cachedImageType = cachedImageReference->imageType; + const auto cachedImageType = cachedImageRecord->type; // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus const auto currentParams = static_cast(imageCreationParams); const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; if (needsRecreation) { // call the eviction callbacl so the currently cached imageID gets eventually deallocated from memory arena. - evictCallback(imageID, *cachedImageReference); + evictCallback(imageID, *cachedImageRecord); // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image - *cachedImageReference = ImageReference(currentFrameIndex); + *cachedImageRecord = CachedImageRecord(currentFrameIndex); // imagesUsageCache->erase(imageID); - // cachedImageReference = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + // cachedImageRecord = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); } } else @@ -523,28 +513,27 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ } // in which case we don't queue anything for upload, and return the idx - if (cachedImageReference->arrayIndex == InvalidTextureIndex) + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. - cachedImageReference->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageReference->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (cachedImageReference->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { // Attempt to create a GPU image and image view for this texture. ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, intendedNextSubmit, std::to_string(imageID)); if (allocResults.isValid()) { - cachedImageReference->imageType = georeferenceImageType; - cachedImageReference->gpuResident = false; - cachedImageReference->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - cachedImageReference->allocationOffset = allocResults.allocationOffset; - cachedImageReference->allocationSize = allocResults.allocationSize; - cachedImageReference->gpuImageView = allocResults.gpuImageView; - - // TODO: queue update of the set with the gpu image view. + cachedImageRecord->type = georeferenceImageType; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = nullptr; } else { @@ -553,37 +542,37 @@ uint32_t DrawResourcesFiller::retrieveGeoreferencedImage_AllocateIfNeeded(image_ // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. _NBL_DEBUG_BREAK_IF(true); - if (cachedImageReference->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { // We previously successfully create and allocated memory for the Image // but failed to bind and create image view // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(cachedImageReference->allocationOffset, cachedImageReference->allocationSize); + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); } - if (cachedImageReference->arrayIndex != InvalidTextureIndex) + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) { // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageReference->arrayIndex, {}); - cachedImageReference->arrayIndex = InvalidTextureIndex; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } } else { // TODO: log here, index allocation failed. - cachedImageReference->arrayIndex = InvalidTextureIndex; + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - assert(cachedImageReference->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed // cached or just inserted, we update the lastUsedFrameIndex - cachedImageReference->lastUsedFrameIndex = currentFrameIndex; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; - return cachedImageReference->arrayIndex; + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo @@ -612,7 +601,7 @@ void DrawResourcesFiller::drawGridDTM( void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit) { - beginMainObject(MainObjectType::IMAGE); + beginMainObject(MainObjectType::STATIC_IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); @@ -632,12 +621,34 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +{ + beginMainObject(MainObjectType::STATIC_IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + + GeoreferencedImageInfo info = {}; + info.topLeft = params.worldspaceOBB.topLeft; + info.dirU = params.worldspaceOBB.dirU; + info.aspectRatio = params.worldspaceOBB.aspectRatio; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GeoreferencedImageInfo + } + + endMainObject(); +} + bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) { if (!intendedNextSubmit.valid()) { // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources - // In that case we don't push any uploads (i.e. we don't record any stagedStaticImage commmand in active command buffer, because there is no active command buffer) + // In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer) return false; } @@ -648,36 +659,45 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); - // Push Static Images Uploads from replay cache, only those who are not gpu resident + // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index auto* device = m_utilities->getLogicalDevice(); - std::vector staticImageCopies; - for (auto& [id, replayImageState] : currentReplayCache->staticImagesState) + bool replayCacheFullyCovered = true; + for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) { - auto it = staticImagesState.find(id); + // TODO: remove temoprary const_cast workaround. + CachedImageRecord& toReplayImageRecord_nonConst = const_cast(toReplayRecord); + + if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this + continue; + + auto* cachedRecord = imagesCache->peek(imageID); bool alreadyResident = false; // compare with existing state, and check whether image id is already resident. - if (it != staticImagesState.end()) + if (cachedRecord != nullptr) { - const StaticImageState& existingState = it->second; - const bool allocationMatches = - existingState.allocationOffset == replayImageState.allocationOffset && - existingState.allocationSize == replayImageState.allocationSize; + cachedRecord->allocationOffset == toReplayRecord.allocationOffset && + cachedRecord->allocationSize == toReplayRecord.allocationSize; - const bool arrayIndexMatches = existingState.arrayIndex == replayImageState.arrayIndex; + const bool arrayIndexMatches = cachedRecord->arrayIndex == toReplayRecord.arrayIndex; - alreadyResident = allocationMatches && arrayIndexMatches && existingState.gpuResident; + alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; } - // if already resident, we don't need to do anything + // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. if (alreadyResident) + { + toReplayImageRecord_nonConst.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state continue; + } + + replayCacheFullyCovered = false; bool successCreateNewImage = false; // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads - auto existingGPUImageViewParams = replayImageState.gpuImageView->getCreationParameters(); + auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); IGPUImage::SCreationParams imageParams = {}; imageParams = existingGPUImageViewParams.image->getCreationParameters(); @@ -687,13 +707,13 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = { .image = newGPUImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + replayImageState.allocationOffset } + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } }; const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) { - newGPUImage->setObjectDebugName((std::to_string(id) + " Static Image 2D").c_str()); + newGPUImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; viewParams.image = newGPUImage; @@ -701,14 +721,9 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit if (newGPUImageView) { successCreateNewImage = true; - - staticImageCopies.push_back(StaticImageCopy { - .cpuImage = replayImageState.cpuImage, - .gpuImageView = newGPUImageView, - .arrayIndex = replayImageState.arrayIndex - }); - - newGPUImageView->setObjectDebugName((std::to_string(id) + " Static Image View 2D").c_str()); + toReplayImageRecord_nonConst.gpuImageView = newGPUImageView; + toReplayImageRecord_nonConst.state = ImageState::CREATED_AND_MEMORY_BOUND; + newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); } } @@ -721,12 +736,15 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success = false; } } - - bool replayStaticUploadSuccess = true; - if (staticImageCopies.size() > 0u) + // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. + imagesCache->clear(); + for (auto it = currentReplayCache->imagesCache->crbegin(); it != currentReplayCache->imagesCache->crend(); it++) + imagesCache->base_t::insert(it->first, it->second); + + if (!replayCacheFullyCovered) { - // We need to block for previous submit in order to safely, rebind image's memory and update the descriptor set array index. + // We need to block for previous submit in order to safely update the descriptor set array index next. // // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, // we could recreate the image and image view, binding them to entirely new memory locations. @@ -734,38 +752,18 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; device->blockForSemaphores({ &waitInfo, 1u }); - replayStaticUploadSuccess = pushStaticImagesUploads_Internal(intendedNextSubmit, staticImageCopies); - } - - if (replayStaticUploadSuccess) - { - staticImagesState = currentReplayCache->staticImagesState; - for (auto& [_, state] : staticImagesState) - state.gpuResident = true; } - success &= replayStaticUploadSuccess; + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); } else { flushDrawObjects(); success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); - - // Push Static Images Uploads, only those who are not gpu resident - std::vector staticImageCopies; - for (auto& [id, staticImageState] : staticImagesState) - { - if (!staticImageState.gpuResident) - staticImageCopies.push_back(StaticImageCopy{ .cpuImage = staticImageState.cpuImage, .gpuImageView = staticImageState.gpuImageView, .arrayIndex = staticImageState.arrayIndex }); - } - const bool staticImagesUploadSuccess = pushStaticImagesUploads_Internal(intendedNextSubmit, staticImageCopies); - if (staticImagesUploadSuccess) - { - for (auto& [id, staticImageState] : staticImagesState) - staticImageState.gpuResident = true; - } - success &= staticImagesUploadSuccess; + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); } return success; } @@ -860,7 +858,12 @@ std::unique_ptr DrawResourcesFiller::createRep stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; - ret->staticImagesState = staticImagesState; // copy state of static images + ret->imagesCache = std::unique_ptr(new ImagesCache(imagesCache->size())); + // It should be copyable, here is a temporary hack: + for (auto it = imagesCache->crbegin(); it != imagesCache->crend(); it++) + { + ret->imagesCache->base_t::insert(it->first, it->second); + } return ret; } @@ -940,7 +943,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex auto msdfImage = msdfTextureArray->getCreationParameters().image; - // preparing msdfs for stagedStaticImage + // preparing msdfs for imageRecord using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; image_barrier_t beforeTransferImageBarrier[] = { @@ -1056,51 +1059,79 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } -bool DrawResourcesFiller::pushStaticImagesUploads_Internal(SIntendedSubmitInfo& intendedNextSubmit, std::span staticImagesCopy) +bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) +{ + bool success = true; + + auto* device = m_utilities->getLogicalDevice(); + auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + + // DescriptorSet Updates + std::vector descriptorInfos; + std::vector descriptorWrites; + descriptorInfos.resize(imagesCache.size()); + descriptorWrites.resize(imagesCache.size()); + + uint32_t descriptorWriteCount = 0u; + for (auto& [id, record] : imagesCache) + { + if (record.state >= ImageState::BOUND_TO_DESCRIPTOR_SET || !record.gpuImageView) + continue; + + // Bind gpu image view to descriptor set + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; + descriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo.desc = record.gpuImageView; + descriptorInfos[descriptorWriteCount] = descriptorInfo; + + // consider batching contiguous writes, if descriptor set updating was a hotspot + IGPUDescriptorSet::SWriteDescriptorSet descriptorWrite = {}; + descriptorWrite.dstSet = descriptorSet; + descriptorWrite.binding = imagesArrayBinding; + descriptorWrite.arrayElement = record.arrayIndex; + descriptorWrite.count = 1u; + descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; + descriptorWrites[descriptorWriteCount] = descriptorWrite; + + const_cast(record).state = ImageState::BOUND_TO_DESCRIPTOR_SET; + descriptorWriteCount++; + } + + if (descriptorWriteCount > 0u) + success &= device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + return success; +} + +bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache) { bool success = true; - if (staticImagesCopy.size() > 0ull) + // Push Static Images Uploads, only those who are not gpu resident + // TODO: remove this vector and check state in each for loop below? + std::vector nonResidentImageRecords; + for (auto& [id, record] : imagesCache) + { + if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) + nonResidentImageRecords.push_back(const_cast(&record)); // TODO: remove const_cast + } + + if (nonResidentImageRecords.size() > 0ull) { auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); - auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; - // DescriptorSet Updates - std::vector descriptorInfos; - std::vector descriptorWrites; - descriptorInfos.resize(staticImagesCopy.size()); - descriptorWrites.resize(staticImagesCopy.size()); - for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) - { - auto& stagedStaticImage = staticImagesCopy[i]; - // Bind gpu image view to descriptor set - descriptorInfos[i].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfos[i].desc = stagedStaticImage.gpuImageView; - - // consider batching contiguous writes, if descriptor set updating was a hotspot - descriptorWrites[i].dstSet = descriptorSet; - descriptorWrites[i].binding = imagesArrayBinding; - descriptorWrites[i].arrayElement = stagedStaticImage.arrayIndex; - descriptorWrites[i].count = 1u; - descriptorWrites[i].info = &descriptorInfos[i]; - } - - success &= device->updateDescriptorSets(descriptorWrites.size(), descriptorWrites.data(), 0u, nullptr); - std::vector beforeCopyImageBarriers; - beforeCopyImageBarriers.resize(staticImagesCopy.size()); + beforeCopyImageBarriers.resize(nonResidentImageRecords.size()); - // Pipeline Barriers before stagedStaticImage - for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) { - auto& stagedStaticImage = staticImagesCopy[i]; - const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; beforeCopyImageBarriers[i] = { .barrier = { @@ -1126,27 +1157,34 @@ bool DrawResourcesFiller::pushStaticImagesUploads_Internal(SIntendedSubmitInfo& } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) { - auto& stagedStaticImage = staticImagesCopy[i]; - auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + auto& imageRecord = *nonResidentImageRecords[i]; + auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; success &= m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, - stagedStaticImage.cpuImage->getBuffer()->getPointer(), stagedStaticImage.cpuImage->getCreationParameters().format, + imageRecord.staticCPUImage->getBuffer()->getPointer(), imageRecord.staticCPUImage->getCreationParameters().format, gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - stagedStaticImage.cpuImage->getRegions()); + imageRecord.staticCPUImage->getRegions()); + + if (success) + imageRecord.state = ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + else + { + // TODO: LOG + } } commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change std::vector afterCopyImageBarriers; - afterCopyImageBarriers.resize(staticImagesCopy.size()); + afterCopyImageBarriers.resize(nonResidentImageRecords.size()); - // Pipeline Barriers before stagedStaticImage - for (uint32_t i = 0u; i < staticImagesCopy.size(); ++i) + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) { - auto& stagedStaticImage = staticImagesCopy[i]; - const auto& gpuImg = stagedStaticImage.gpuImageView->getCreationParameters().image; + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; afterCopyImageBarriers[i] = { .barrier = { @@ -1760,7 +1798,44 @@ bool DrawResourcesFiller::addImageObject_Internal(const ImageObjectInfo& imageOb DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STATIC_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GeoreferencedImageInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GeoreferencedImageInfo), alignof(GeoreferencedImageInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &georeferencedImageInfo, sizeof(GeoreferencedImageInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STREAMED_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function drawObj.geometryAddress = geometryBufferOffset; drawObjectsToBeFilled[0u] = drawObj; @@ -1770,7 +1845,7 @@ bool DrawResourcesFiller::addImageObject_Internal(const ImageObjectInfo& imageOb uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit) { uint32_t textureIdx = InvalidTextureIndex; - ImageReference* imageRef = imagesUsageCache->get(imageID); + CachedImageRecord* imageRef = imagesCache->get(imageID); if (imageRef) { textureIdx = imageRef->arrayIndex; @@ -1779,7 +1854,7 @@ uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SInten return textureIdx; } -void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const ImageReference& evicted, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit) { if (evicted.arrayIndex == InvalidTextureIndex) { @@ -1816,11 +1891,6 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Imag ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); } - - // erase imageID from our state map - // kindof mirrors the state of the LRUCache for static images - if (evicted.imageType == ImageType::STATIC) - staticImagesState.erase(imageID); } DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName) @@ -1832,8 +1902,8 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo // Attempt to create a GPU image and corresponding image view for this texture. // If creation or memory allocation fails (likely due to VRAM exhaustion), - // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageReference image remains. - while (imagesUsageCache->size() > 0u) + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageRecord image remains. + while (imagesCache->size() > 0u) { // Try creating the image and allocating memory for it: nbl::video::IGPUImage::SCreationParams params = {}; @@ -1917,7 +1987,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo } // Getting here means we failed creating or allocating the image, evict and retry. - if (imagesUsageCache->size() == 1u) + if (imagesCache->size() == 1u) { // Nothing else to evict; give up. // We probably have evicted almost every other texture except the one we just allocated an index for @@ -1925,13 +1995,13 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo break; } - assert(imagesUsageCache->size() > 1u); + assert(imagesCache->size() > 1u); - const image_id evictionCandidate = imagesUsageCache->select_eviction_candidate(); - ImageReference* imageRef = imagesUsageCache->peek(evictionCandidate); + const image_id evictionCandidate = imagesCache->select_eviction_candidate(); + CachedImageRecord* imageRef = imagesCache->peek(evictionCandidate); if (imageRef) evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); - imagesUsageCache->erase(evictionCandidate); + imagesCache->erase(evictionCandidate); while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference @@ -2062,7 +2132,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - // if cachedImageReference->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx + // if cachedImageRecord->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx if (inserted->alloc_idx == InvalidTextureIndex) { // New insertion == cache miss happened and insertion was successfull diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index ae071654a..4faa3fecc 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -221,7 +221,7 @@ struct DrawResourcesFiller * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. * - * If the image is already cached and its slot is valid, it returns the slot index directly. + * If the image is already cached and its slot is valid, it returns true; * Otherwise, it performs the following: * - Allocates a new descriptor set slot. * - Promotes the image format to be GPU-compatible. @@ -233,23 +233,43 @@ struct DrawResourcesFiller * @param cpuImage The CPU-side image resource to (possibly) upload. * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. * - * @return The index (slot) into the descriptor set array where the image is or will be bound. - * Returns `InvalidTextureIndex` only if all fallback and eviction attempts failed. - * * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers * a flush of pending draws to preserve correctness. * * @note The function uses the `imagesUsageCache` LRU cache to track usage and validity of texture slots. * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + * @return true if the image was successfully cached and is ready for use; false if allocation failed. */ - uint32_t addStaticImage2D(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureStaticImageAvailability(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); - uint32_t retrieveGeoreferencedImage_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + /** + * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. + * + * If the specified image ID is not already present in the cache, or if the cached version is incompatible + * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, + * creates the image and its view, to be bound to a descriptor binding in the future. + * + * If the image already exists and matches the requested parameters, its usage metadata is updated. + * In either case, the cache is updated to reflect usage in the current frame. + * + * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * + * @param imageID Unique identifier of the image to add or reuse. + * @param params Georeferenced Image Params + * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * + * @return true if the image was successfully cached and is ready for use; false if allocation failed. + * [TODO]: should be internal protected member function. + */ + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); + // This function must be called immediately after `addStaticImage` for the same imageID. + void addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); @@ -357,7 +377,7 @@ struct DrawResourcesFiller std::vector drawCallsData; ResourcesCollection resourcesCollection; std::vector msdfImagesState; - std::unordered_map staticImagesState; + std::unique_ptr imagesCache; uint32_t activeMainObjectIndex = InvalidMainObjectIdx; // TODO: non msdf general CPU Images // TODO: Get total memory consumption for logging? @@ -394,8 +414,11 @@ struct DrawResourcesFiller /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); - /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. - bool pushStaticImagesUploads_Internal(SIntendedSubmitInfo& intendedNextSubmit, std::span staticImagesCopy); + /// @brief binds cached images into their correct descriptor set slot if not already resident. + bool bindImagesToArrayIndices(ImagesCache& imagesCache); + + /// @brief Records GPU copy commands for all staged images into the active command buffer, and binds them into correct descriptor set slot. + bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); const size_t calculateRemainingResourcesSize() const; @@ -462,9 +485,12 @@ struct DrawResourcesFiller /// Attempts to upload a single GridDTMInfo considering resource limitations bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); - /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using addStaticImage2D function) + /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; + /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) + bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx);; + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); /** @@ -483,7 +509,7 @@ struct DrawResourcesFiller * * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] */ - void evictImage_SubmitIfNeeded(image_id imageID, const ImageReference& evicted, SIntendedSubmitInfo& intendedNextSubmit); + void evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit); struct ImageAllocateResults { @@ -707,12 +733,8 @@ struct DrawResourcesFiller bool m_hasInitializedMSDFTextureArrays = false; // Images: - std::unique_ptr imagesUsageCache; + std::unique_ptr imagesCache; smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; - - // TODO: consider removing this and just using the `imagesUsageCache` and `ImageReference` when `core::ResizableLRUCache` is copyable and iterable - // Current state of the static images, used in `pushStaticImagesUploads` to make StaticImages `gpuResident` and bind them to correct array index - std::unordered_map staticImagesState; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index fe3e8bde9..d525a68f6 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -6,19 +6,29 @@ using namespace nbl::asset; using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. +enum class ImageState : uint8_t +{ + INVALID = 0, + CREATED_AND_MEMORY_BOUND, // GPU image created, not bound to descriptor set yet + BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident, but may contain uninitialized or partial data + GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully +}; + enum class ImageType : uint8_t { INVALID = 0, - STATIC, // Regular non-georeferenced image, fully loaded once - GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view - GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant + STATIC, // Regular non-georeferenced image, fully loaded once + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view + GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant }; struct GeoreferencedImageParams { - uint32_t2 imageExtents; - uint32_t2 viewportExtents; - asset::E_FORMAT format; + OrientedBoundingBox2D worldspaceOBB = {}; + uint32_t2 imageExtents = {}; + uint32_t2 viewportExtents = {}; + asset::E_FORMAT format = {}; + // TODO: Need to add other stuff later. }; /** @@ -99,106 +109,89 @@ struct ImageCleanup : public core::IReferenceCounted }; -struct StaticImageCopy -{ - core::smart_refctd_ptr cpuImage; - core::smart_refctd_ptr gpuImageView; - uint32_t arrayIndex; -}; - -// TODO: consider just using the ImagesUsageCache to store this StaticImagesState, i.e. merge this struct with the ImageReference -// it will be possible after LRUCache improvements and copyability -// for now this will be a mirror of the LRUCache but in an unordered_map -struct StaticImageState -{ - core::smart_refctd_ptr cpuImage = nullptr; - core::smart_refctd_ptr gpuImageView = nullptr; - uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; - uint64_t allocationSize = 0u; - uint32_t arrayIndex = ~0u; // in texture array descriptor - bool gpuResident = false; -}; - - -struct ImageReference +struct CachedImageRecord { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding - ImageType imageType = ImageType::INVALID; - bool gpuResident = false; + ImageType type = ImageType::INVALID; + ImageState state = ImageState::INVALID; uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; core::smart_refctd_ptr gpuImageView = nullptr; + core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value - ImageReference(uint64_t currentFrameIndex) + CachedImageRecord(uint64_t currentFrameIndex) : arrayIndex(InvalidTextureIndex) - , imageType(ImageType::INVALID) - , gpuResident(false) + , type(ImageType::INVALID) + , state(ImageState::INVALID) , lastUsedFrameIndex(currentFrameIndex) , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) , allocationSize(0ull) , gpuImageView(nullptr) + , staticCPUImage(nullptr) {} - ImageReference() - : ImageReference(0ull) + CachedImageRecord() + : CachedImageRecord(0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` - inline ImageReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } + inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; // A resource-aware image cache with an LRU eviction policy. -// This cache tracks image usage by ID and provides hooks for eviction logic, such as releasing descriptor slots and deallocating GPU memory. +// This cache tracks image usage by ID and provides hooks for eviction logic (such as releasing descriptor slots and deallocating GPU memory done by user of this class) // Currently, eviction is purely LRU-based. In the future, eviction decisions may incorporate additional factors: // - memory usage per image. // - lastUsedFrameIndex. -// This class does not own GPU resources directly, but helps coordinate their lifetimes in sync with GPU usage via eviction callbacks. -class ImagesUsageCache +// This class helps coordinate images' lifetimes in sync with GPU usage via eviction callbacks. +class ImagesCache : public core::ResizableLRUCache { public: - ImagesUsageCache(size_t capacity) - : lruCache(ImagesLRUCache(capacity)) + using base_t = core::ResizableLRUCache; + + ImagesCache(size_t capacity) + : base_t(capacity) {} // Attempts to insert a new image into the cache. // If the cache is full, invokes the provided `evictCallback` to evict an image. // Returns a pointer to the inserted or existing ImageReference. - template EvictionCallback> - inline ImageReference* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) + template EvictionCallback> + inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) { - auto lruEvictionCallback = [&](const ImageReference& evicted) + auto lruEvictionCallback = [&](const CachedImageRecord& evicted) { - const image_id* evictingKey = lruCache.get_least_recently_used(); + const image_id* evictingKey = base_t::get_least_recently_used(); assert(evictingKey != nullptr); if (evictingKey) evictCallback(*evictingKey, evicted); }; - return lruCache.insert(imageID, lastUsedSema, lruEvictionCallback); + return base_t::insert(imageID, lastUsedSema, lruEvictionCallback); } // Retrieves the image associated with `imageID`, updating its LRU position. - inline ImageReference* get(image_id imageID) + inline CachedImageRecord* get(image_id imageID) { - return lruCache.get(imageID); + return base_t::get(imageID); } // Retrieves the ImageReference without updating LRU order. - inline ImageReference* peek(image_id imageID) + inline CachedImageRecord* peek(image_id imageID) { - return lruCache.peek(imageID); + return base_t::peek(imageID); } - inline size_t size() const { return lruCache.size(); } + inline size_t size() const { return base_t::size(); } // Selects an eviction candidate based on LRU policy. // In the future, this could factor in memory pressure or semaphore sync requirements. inline image_id select_eviction_candidate() { - const image_id* lru = lruCache.get_least_recently_used(); + const image_id* lru = base_t::get_least_recently_used(); if (lru) return *lru; else @@ -212,10 +205,6 @@ class ImagesUsageCache // Removes a specific image from the cache (manual eviction). inline void erase(image_id imageID) { - lruCache.erase(imageID); + base_t::erase(imageID); } - -private: - using ImagesLRUCache = core::ResizableLRUCache; - ImagesLRUCache lruCache; // TODO: for now, work with simple lru cache, later on consider resource usage along with lastUsedSema value }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 1394bf719..c59669fa6 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -45,7 +45,7 @@ static constexpr bool DebugModeWireframe = false; static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; -static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic +static constexpr bool CacheAndReplay = true; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic enum class ExampleMode { @@ -2906,10 +2906,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from //printf(std::format("\n Image {} \n", i).c_str()); - drawResourcesFiller.addStaticImage2D(imageID, sampleImages[i], intendedNextSubmit); + drawResourcesFiller.ensureStaticImageAvailability(imageID, sampleImages[i], intendedNextSubmit); drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); //printf("\n"); } + + GeoreferencedImageParams geoRefParams = {}; + geoRefParams.format = asset::EF_R8G8B8A8_SRGB; + geoRefParams.imageExtents = uint32_t2 (2048, 2048); + geoRefParams.viewportExtents = (m_realFrameIx <= 5u) ? uint32_t2(1280, 720) : uint32_t2(3840, 2160); // to test trigerring resize/recreation + // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); + LineStyleInfo lineStyle = { .color = float32_t4(1.0f, 0.1f, 0.1f, 0.9f), diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 0280b5881..72ab980c4 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -118,9 +118,10 @@ enum class MainObjectType : uint32_t POLYLINE, HATCH, TEXT, - IMAGE, + STATIC_IMAGE, DTM, - GRID_DTM + GRID_DTM, + STREAMED_IMAGE, }; enum class ObjectType : uint32_t @@ -130,9 +131,10 @@ enum class ObjectType : uint32_t CURVE_BOX = 2u, POLYLINE_CONNECTOR = 3u, FONT_GLYPH = 4u, - IMAGE = 5u, + STATIC_IMAGE = 5u, TRIANGLE_MESH = 6u, - GRID_DTM = 7u + GRID_DTM = 7u, + STREAMED_IMAGE = 8u, }; enum class MajorAxis : uint32_t @@ -240,6 +242,16 @@ struct ImageObjectInfo uint32_t textureID; // 4 bytes (32) }; +// Goes into geometry buffer, needs to be aligned by 8 +// Currently a simple OBB like ImageObject, but later will be fullscreen with additional info about UV offset for toroidal(mirror) addressing +struct GeoreferencedImageInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) + uint32_t textureID; // 4 bytes (32) +}; + // Goes into geometry buffer, needs to be aligned by 8 struct GridDTMInfo { diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 766225acd..69fab0a4c 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -375,7 +375,7 @@ float4 fragMain(PSInput input) : SV_TARGET localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f + bolden, globals.antiAliasingFactor / 2.0f + bolden, msdf); } } - else if (objType == ObjectType::IMAGE) + else if (objType == ObjectType::STATIC_IMAGE) { const float2 uv = input.getImageUV(); const uint32_t textureId = input.getImageTextureId(); @@ -413,14 +413,25 @@ float4 fragMain(PSInput input) : SV_TARGET printf("uv = %f, %f", uv.x, uv.y); } - + else if (objType == ObjectType::STREAMED_IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIndex) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } uint2 fragCoord = uint2(input.position.xy); if (localAlpha <= 0) discard; - const bool colorFromTexture = objType == ObjectType::IMAGE || objType == ObjectType::GRID_DTM; + const bool colorFromTexture = objType == ObjectType::STREAMED_IMAGE || objType == ObjectType::STATIC_IMAGE || objType == ObjectType::GRID_DTM; return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index e92a8d33b..b225b64a4 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -624,7 +624,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setFontGlyphUV(uv); outV.setFontGlyphTextureId(textureID); } - else if (objType == ObjectType::IMAGE) + else if (objType == ObjectType::STATIC_IMAGE) { pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); @@ -670,6 +670,27 @@ PSInput main(uint vertexID : SV_VertexID) outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setImageUV(corner); } + else if (objType == ObjectType::STREAMED_IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated + + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } // Make the cage fullscreen for testing: #if 0 From 42ab873ba3d698da396368cb2f8b545107ce8f77 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 22 May 2025 07:53:43 +0400 Subject: [PATCH 092/129] streamed image copy --- 62_CAD/DrawResourcesFiller.cpp | 125 ++++++++++++++++++++++++++++++++- 62_CAD/DrawResourcesFiller.h | 11 ++- 62_CAD/Images.h | 6 ++ 3 files changed, 137 insertions(+), 5 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 425834a99..4b1172847 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -497,8 +497,8 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image *cachedImageRecord = CachedImageRecord(currentFrameIndex); - // imagesUsageCache->erase(imageID); - // cachedImageRecord = imagesUsageCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); } } else @@ -756,6 +756,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + // Streamed uploads in cache&replay?! } else { @@ -764,6 +765,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + success &= pushStreamedImagesUploads(intendedNextSubmit); } return success; } @@ -1225,6 +1227,125 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN return success; } +bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + + if (streamedImageCopies.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopy] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (auto& [imageID, imageCopy] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageCopy.srcBuffer->getPointer(), gpuImg->getCreationParameters().format, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + { &imageCopy.region, 1u }); + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopy] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + afterCopyImageBarriers.push_back ( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + if (!success) + { + // TODO: Log + _NBL_DEBUG_BREAK_IF(true); + } + return success; +} + const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const { assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 4faa3fecc..6ece66de3 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -237,7 +237,7 @@ struct DrawResourcesFiller * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers * a flush of pending draws to preserve correctness. * - * @note The function uses the `imagesUsageCache` LRU cache to track usage and validity of texture slots. + * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. * @return true if the image was successfully cached and is ready for use; false if allocation failed. */ @@ -417,8 +417,11 @@ struct DrawResourcesFiller /// @brief binds cached images into their correct descriptor set slot if not already resident. bool bindImagesToArrayIndices(ImagesCache& imagesCache); - /// @brief Records GPU copy commands for all staged images into the active command buffer, and binds them into correct descriptor set slot. + /// @brief Records GPU copy commands for all staged images into the active command buffer. bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); + + /// @brief copies the queued up streamed copies. + bool pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); const size_t calculateRemainingResourcesSize() const; @@ -550,7 +553,7 @@ struct DrawResourcesFiller * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). * @param[out] outImageType Indicates whether the image should be fully resident or streamed. * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. - */ + */ void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); void resetMainObjects() @@ -736,5 +739,7 @@ struct DrawResourcesFiller std::unique_ptr imagesCache; smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; + + std::unordered_map streamedImageCopies; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index d525a68f6..e43c72fd2 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -208,3 +208,9 @@ class ImagesCache : public core::ResizableLRUCache base_t::erase(imageID); } }; + +struct StreamedImageCopy +{ + core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + asset::IImage::SBufferCopy region; +}; From b3dd4099d394f76d867bc60e068dcb5280fa7f23 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 22 May 2025 10:07:24 +0400 Subject: [PATCH 093/129] static images improvements, promoting from the get-go --- 62_CAD/main.cpp | 204 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 179 insertions(+), 25 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c59669fa6..269b037be 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -277,6 +277,87 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::array,ISwapchain::MaxImages> m_framebuffers; }; + +// TODO: Move this funcitons that help with creating a new promoted CPUImage +template +struct PromotionComponentSwizzle +{ + template + void operator()(const InT* in, OutT* out) const + { + using in_t = std::conditional_t, uint64_t, InT>; + using out_t = std::conditional_t, uint64_t, OutT>; + + reinterpret_cast(out)[0u] = reinterpret_cast(in)[0u]; + + if constexpr (SRC_CHANNELS > 1) + reinterpret_cast(out)[1u] = reinterpret_cast(in)[1u]; + else + reinterpret_cast(out)[1u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 2) + reinterpret_cast(out)[2u] = reinterpret_cast(in)[2u]; + else + reinterpret_cast(out)[2u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 3) + reinterpret_cast(out)[3u] = reinterpret_cast(in)[3u]; + else + reinterpret_cast(out)[3u] = static_cast(1); + } +}; +template +bool performCopyUsingImageFilter( + const core::smart_refctd_ptr& inCPUImage, + const core::smart_refctd_ptr& outCPUImage) +{ + Filter filter; + + const uint32_t mipLevels = inCPUImage->getCreationParameters().mipLevels; + + for (uint32_t level = 0u; level < mipLevels; ++level) + { + const auto regions = inCPUImage->getRegions(level); + + for (auto& region : regions) + { + typename Filter::state_type state = {}; + state.extent = region.imageExtent; + state.layerCount = region.imageSubresource.layerCount; + state.inImage = inCPUImage.get(); + state.outImage = outCPUImage.get(); + state.inOffsetBaseLayer = core::vectorSIMDu32(region.imageOffset.x, region.imageOffset.y, region.imageOffset.z, region.imageSubresource.baseArrayLayer); + state.outOffsetBaseLayer = core::vectorSIMDu32(0u); + state.inMipLevel = region.imageSubresource.mipLevel; + state.outMipLevel = region.imageSubresource.mipLevel; + + if (!filter.execute(core::execution::par_unseq, &state)) + return false; + } + } + return true; +} + +bool performImageFormatPromotionCopy(const core::smart_refctd_ptr& inCPUImage, const core::smart_refctd_ptr& outCPUImage) +{ + asset::E_FORMAT srcImageFormat = inCPUImage->getCreationParameters().format; + asset::E_FORMAT dstImageFormat = outCPUImage->getCreationParameters().format; + + // In = srcData, Out = stagingBuffer + if (srcImageFormat == dstImageFormat) + return false; + + auto srcChannelCount = asset::getFormatChannelCount(srcImageFormat); + if (srcChannelCount == 1u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 2u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 3u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); +} + class ComputerAidedDesign final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication { using device_base_t = examples::SimpleWindowedApplication; @@ -388,22 +469,44 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape - samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; - samplerParams.AnisotropicFilter = 3; - samplerParams.CompareEnable = false; - samplerParams.CompareFunc = ECO_GREATER; - samplerParams.LodBias = 0.f; - samplerParams.MinLod = -1000.f; - samplerParams.MaxLod = 1000.f; - msdfTextureSampler = m_device->createSampler(samplerParams); - + // MSDF Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + msdfImageSampler = m_device->createSampler(samplerParams); + } + + // Static Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + staticImageSampler = m_device->createSampler(samplerParams); + } + // Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage // Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here { @@ -746,10 +849,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorInfosSet0[0u].desc = m_globalsBuffer; descriptorInfosSet0[1u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfosSet0[1u].info.combinedImageSampler.sampler = msdfTextureSampler; + descriptorInfosSet0[1u].info.combinedImageSampler.sampler = msdfImageSampler; descriptorInfosSet0[1u].desc = drawResourcesFiller.getMSDFsTextureArray(); - descriptorInfosSet0[2u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? + descriptorInfosSet0[2u].desc = staticImageSampler; // TODO[Erfan]: different sampler and make immutable? // This is bindless to we write to it later. // descriptorInfosSet0[3u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; @@ -1094,8 +1197,58 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); } - const auto cpuImage = cpuImgView->getCreationParameters().image; - sampleImages.push_back(cpuImage); + + const auto loadedCPUImage = cpuImgView->getCreationParameters().image; + const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + // Promoting the image to a format GPU supports. (so that updateImageViaStagingBuffer doesn't have to handle that each frame if overflow-submit needs to happen) + auto promotedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + promotedCPUImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = promotedCPUImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(promotedCPUImageCreationParams.usage) + }; + promotedCPUImageCreationParams.format = m_physicalDevice->promoteImageFormat(request,video::IGPUImage::TILING::OPTIMAL); + } + + if (loadedCPUImageCreationParams.format != promotedCPUImageCreationParams.format) + { + smart_refctd_ptr promotedCPUImage = ICPUImage::create(promotedCPUImageCreationParams); + core::rational bytesPerPixel = asset::getBytesPerPixel(promotedCPUImageCreationParams.format); + + const auto extent = loadedCPUImageCreationParams.extent; + const uint32_t mipLevels = loadedCPUImageCreationParams.mipLevels; + const uint32_t arrayLayers = loadedCPUImageCreationParams.arrayLayers; + + // Only supporting 1 mip, it's just for test.. + const size_t byteSize = (bytesPerPixel * extent.width * extent.height * extent.depth * arrayLayers).getIntegerApprox(); // TODO: consider mips + ICPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = byteSize; + smart_refctd_ptr promotedCPUImageBuffer = ICPUBuffer::create(std::move(bufferCreationParams)); + + auto newRegions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = newRegions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; // TODO + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = arrayLayers; + region.bufferOffset = 0u; + region.bufferRowLength = 0u; + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = extent; + promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); + + performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); + sampleImages.push_back(promotedCPUImage); + } + else + { + sampleImages.push_back(loadedCPUImage); + } } return true; @@ -2928,11 +3081,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { std::vector linePoints; linePoints.push_back({ 0.0, 0.0 }); - linePoints.push_back({ 100.0, 0.0 }); - linePoints.push_back({ 100.0, -100.0 }); + linePoints.push_back({ 1.0, 0.0 }); + linePoints.push_back({ 1.0, -1.0 }); polyline.addLinePoints(linePoints); } - // drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); + drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); } else if (mode == ExampleMode::CASE_8) { @@ -2985,7 +3138,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.0f); singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-250.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.5f); // singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), nbl::core::PI() * abs(cos(m_timeElapsed * 0.00005))); - // Smaller text to test mip maps + // Smaller text to test level maps //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-130.0), float32_t2(0.4, 0.4), rotation); //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-150.0), float32_t2(0.2, 0.2), rotation); } @@ -3482,7 +3635,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot - smart_refctd_ptr msdfTextureSampler; + smart_refctd_ptr msdfImageSampler; + smart_refctd_ptr staticImageSampler; smart_refctd_ptr m_globalsBuffer; smart_refctd_ptr descriptorSet0; From 65fe2ab855133edd71a89c4d2cedcf07596c49f8 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 22 May 2025 10:12:17 +0400 Subject: [PATCH 094/129] work on internal `streamedImageCopies` --- 62_CAD/DrawResourcesFiller.cpp | 29 ++++++++++++++++++++--------- 62_CAD/DrawResourcesFiller.h | 5 ++++- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 4b1172847..2449e8b05 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -63,7 +63,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { // TODO: Get from user side. - .size = 270 * 1024 * 1024, // 70 MB + .size = 65 * 1024 * 1024, // 70 MB .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, .memoryTypeIndex = memoryTypeIdx, .dedication = nullptr, @@ -575,6 +575,12 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } +bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy) +{ + auto& vec = streamedImageCopies[imageID]; + vec.emplace_back(imageCopy); +} + // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) void DrawResourcesFiller::drawGridDTM( @@ -1244,7 +1250,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende beforeCopyImageBarriers.reserve(streamedImageCopies.size()); // Pipeline Barriers before imageCopy - for (auto& [imageID, imageCopy] : streamedImageCopies) + for (auto& [imageID, imageCopies] : streamedImageCopies) { auto* imageRecord = imagesCache->peek(imageID); if (imageRecord == nullptr) @@ -1277,7 +1283,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - for (auto& [imageID, imageCopy] : streamedImageCopies) + for (auto& [imageID, imageCopies] : streamedImageCopies) { auto* imageRecord = imagesCache->peek(imageID); if (imageRecord == nullptr) @@ -1285,11 +1291,14 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - success &= m_utilities->updateImageViaStagingBuffer( - intendedNextSubmit, - imageCopy.srcBuffer->getPointer(), gpuImg->getCreationParameters().format, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { &imageCopy.region, 1u }); + for (auto& imageCopy : imageCopies) + { + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageCopy.srcBuffer->getPointer(), gpuImg->getCreationParameters().format, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + { &imageCopy.region, 1u }); + } } commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change @@ -1298,7 +1307,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende afterCopyImageBarriers.reserve(streamedImageCopies.size()); // Pipeline Barriers before imageCopy - for (auto& [imageID, imageCopy] : streamedImageCopies) + for (auto& [imageID, imageCopies] : streamedImageCopies) { auto* imageRecord = imagesCache->peek(imageID); if (imageRecord == nullptr) @@ -1330,6 +1339,8 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende }); } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + + streamedImageCopies.clear(); } else { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 6ece66de3..520f984a7 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -264,6 +264,9 @@ struct DrawResourcesFiller */ bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + // [TODO]: should be internal protected member function. + bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); + // This function must be called immediately after `addStaticImage` for the same imageID. void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); @@ -740,6 +743,6 @@ struct DrawResourcesFiller smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; - std::unordered_map streamedImageCopies; + std::unordered_map> streamedImageCopies; }; From 9d0d57d700178f6ce2786380f9b14b88bb373bc2 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 23 May 2025 12:30:43 +0400 Subject: [PATCH 095/129] small fixes --- 62_CAD/DrawResourcesFiller.cpp | 5 +++-- 62_CAD/Images.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 2449e8b05..da0678c9b 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -579,6 +579,7 @@ bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, { auto& vec = streamedImageCopies[imageID]; vec.emplace_back(imageCopy); + return true; } // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo @@ -629,7 +630,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) { - beginMainObject(MainObjectType::STATIC_IMAGE); + beginMainObject(MainObjectType::STREAMED_IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); @@ -1295,7 +1296,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende { success &= m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, - imageCopy.srcBuffer->getPointer(), gpuImg->getCreationParameters().format, + imageCopy.srcBuffer->getPointer(), imageCopy.srcFormat, gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &imageCopy.region, 1u }); } diff --git a/62_CAD/Images.h b/62_CAD/Images.h index e43c72fd2..73be7ed50 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -211,6 +211,7 @@ class ImagesCache : public core::ResizableLRUCache struct StreamedImageCopy { + asset::E_FORMAT srcFormat; core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? asset::IImage::SBufferCopy region; }; From 723506eed5c6e80d8197a345ec4af46eb8829e62 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 23 May 2025 13:48:18 +0200 Subject: [PATCH 096/129] Implemented grid DTM rendering --- 62_CAD/DrawResourcesFiller.cpp | 3 + 62_CAD/DrawResourcesFiller.h | 1 + 62_CAD/main.cpp | 4 +- 62_CAD/shaders/globals.hlsl | 6 +- 62_CAD/shaders/main_pipeline/common.hlsl | 14 +- 62_CAD/shaders/main_pipeline/dtm.hlsl | 3 +- .../main_pipeline/fragment_shader.hlsl | 190 ++++++++++++++++-- .../shaders/main_pipeline/vertex_shader.hlsl | 14 +- 8 files changed, 200 insertions(+), 35 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 105374493..86633a2be 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -587,6 +587,7 @@ void DrawResourcesFiller::drawGridDTM( const float64_t2& topLeft, float64_t height, float64_t width, + float gridCellWidth, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { @@ -594,7 +595,9 @@ void DrawResourcesFiller::drawGridDTM( gridDTMInfo.topLeft = topLeft; gridDTMInfo.height = height; gridDTMInfo.width = width; + gridDTMInfo.gridCellWidth = gridCellWidth; + setActiveDTMSettings(dtmSettingsInfo); beginMainObject(MainObjectType::GRID_DTM); uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 801dc41c2..1862679af 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -207,6 +207,7 @@ struct DrawResourcesFiller void drawGridDTM(const float64_t2& topLeft, float64_t height, float64_t width, + float gridCellWidth, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 356ff23aa..78f6a5504 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3364,7 +3364,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu else if (mode == ExampleMode::CASE_11) { DTMSettingsInfo dtmInfo{}; - //dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::OUTLINE; dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; dtmInfo.mode |= E_DTM_MODE::CONTOUR; @@ -3435,7 +3435,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, 400.0f, 800.0f, dtmInfo, intendedNextSubmit); + drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, 400.0f, 800.0f, 40.0f, dtmInfo, intendedNextSubmit); } } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 538387491..0a0801632 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -8,7 +8,7 @@ #endif // TODO[Erfan]: Turn off in the future, but keep enabled to test -#define NBL_FORCE_EMULATED_FLOAT_64 +// #define NBL_FORCE_EMULATED_FLOAT_64 #include #include @@ -247,9 +247,7 @@ struct GridDTMInfo pfloat64_t height; // 8 bytes (24) pfloat64_t width; // 8 bytes (32) uint32_t textureID; // 4 bytes (36) - uint32_t dtmInfoID; // 4 bytes (40) - float gridCellWidth; // 4 bytes (44) - float _padding; // 4 bytes (48) + float gridCellWidth; // 4 bytes (40) }; static uint32_t packR11G11B10_UNORM(float32_t3 color) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index ccc30b1b8..c0e44ab29 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -232,15 +232,17 @@ struct PSInput #endif /* GRID DTM */ - uint getHeightMapTextureID() { return data1.x; } - uint getDTMSettingsID() { return data1.y; } - float getGridDTMScreenSpaceCellWidth() { return data2.x; } + uint getGridDTMHeightTextureID(uint textureID) { return data1.z; } + float getGridDTMScreenSpaceCellWidth() { return data3.x; } float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } + float2 getGridDTMScreenSpaceTopLeft() { return data2.xy; } + float2 getGridDTMScreenSpaceGridExtents() { return data2.zw; } - void setHeightMapTextureID(uint heightMapTextureID) { data1.x = heightMapTextureID; } - void setDTMSettingsID(uint dtmSettingsID) { data1.y = dtmSettingsID; } - void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data2.x = screenSpaceGridWidth; } + void setGridDTMHeightTextureID(uint textureID) { data1.z = textureID; } + void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data3.x = screenSpaceGridWidth; } void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } + void setGridDTMScreenSpaceTopLeft(float2 screenSpaceTopLeft) { data2.xy = screenSpaceTopLeft; } + void setGridDTMScreenSpaceGridExtents(float2 screenSpaceGridExtends) { data2.zw = screenSpaceGridExtends; } }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 63e1194e4..ee698e19c 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -308,7 +308,7 @@ float4 calculateDTMContourColor(in DTMContourSettings contourSettings, in float3 return float4(0.0f, 0.0f, 0.0f, 0.0f); } -float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in float2 fragPos, in float3 baryCoord, in float height) +float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in float2 fragPos) { float4 outputColor; @@ -362,7 +362,6 @@ float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in minDistance = min(minDistance, distance); } - } outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 766225acd..6f8edc7b0 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,6 +117,46 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } +// TODO: move to other header +float4 calculateGridDTMOutlineColor(in uint outlineLineStyleIdx, in nbl::hlsl::shapes::Line outlineLineSegments[2], in float2 fragPos, in float phaseShift) +{ + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + const float stretch = 1.0f; + + // find distance to outline + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + distance = ClippedSignedDistance >::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(outlineLineSegments[i]); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, outlineLineSegments[i], arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + } + + float4 outputColor; + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -129,7 +169,7 @@ float4 fragMain(PSInput input) : SV_TARGET const MainObject mainObj = loadMainObject(currentMainObjectIdx); if (pc.isDTMRendering) - { + { DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); float3 v[3]; @@ -143,8 +183,8 @@ float4 fragMain(PSInput input) : SV_TARGET float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); - if (dtmSettings.drawOutlineEnabled()) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy, baryCoord, height)); + if (dtmSettings.drawOutlineEnabled()) // TODO: do i need 'height' paramter here? + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, v, input.position.xy)); if (dtmSettings.drawContourEnabled()) { for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder @@ -393,27 +433,147 @@ float4 fragMain(PSInput input) : SV_TARGET // Query dtm settings // use texture Gather to get 4 corners: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-to-gather - // A. the outlines can be stippled, use phaseshift of the line such that they started from the grid's origin worldspace coordinate - // B. the contours are computed for triangles, use the same function as for dtms, choose between the two triangles based on local UV coords in current cell - // Make it so we can choose which diagonal to use to construct the triangle, it's either u=v or u=1-v - // C. Height shading same as contours (split into two triangles) + // DONE (but needs to be fixed): A. the outlines can be stippled, use phaseshift of the line such that they started from the grid's origin worldspace coordinate + // DONE: B. the contours are computed for triangles, use the same function as for dtms, choose between the two triangles based on local UV coords in current cell + // DONE: Make it so we can choose which diagonal to use to construct the triangle, it's either u=v or u=1-v + // DONE: C. Height shading same as contours (split into two triangles) // Heights can have invalid values (let's say NaN) if a cell corner has NaN value then no triangle (for contour and shading) and no outline should include that corner. (see DTM image in discord with gaps) // TODO: we need to emulate dilation and do sdf of neighbouring cells as well. because contours, outlines and shading can bleed into other cells for AA. // [NOTE] Do dilation as last step, when everything else works fine - textureColor = float4(1.0f, 1.0f, 1.0f, 1.0f); - float2 uv = input.getImageUV(); - float scalar = uv.x * uv.x * 0.25f + uv.y * uv.y * 0.25f; - textureColor *= scalar; - localAlpha = 1.0f; + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + float2 pos = input.getGridDTMScreenSpacePosition(); + + // grid consists of square cells and cells are divided into two triangles: + // depending on mode it is + // either: or: + // v2a-------v1 v0-------v2b + // | A / | | \ B | + // | / | | \ | + // | / B | | A \ | + // v0-------v2b v2a-------v1 + // + + // TODO: probably needs to be a part of grid dtm settings struct + const bool diagonalFromTopLeftToBottomRight = true; + + // calculate screen space coordinates of vertices of the current tiranlge within the grid + float3 v[3]; + nbl::hlsl::shapes::Line outlineLineSegments[2]; + float outlinePhaseShift; + { + float2 topLeft = input.getGridDTMScreenSpaceTopLeft(); + float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); + float cellWidth = input.getGridDTMScreenSpaceCellWidth(); + float2 uv = input.getImageUV(); + + float2 gridSpacePos = uv * gridExtents; + + float2 cellCoords; + { + float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; + cellCoords.x = uint32_t(gridSpacePosDivGridCellWidth.x); + cellCoords.y = uint32_t(gridSpacePosDivGridCellWidth.y); + } + + // TODO: do we want to calculate it in the vertex shader? + const float MaxCellCoordX = round(gridExtents.x / cellWidth); + const float MaxCellCoordY = round(gridExtents.y / cellWidth); + + float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? + + const float2 DistancesToTriangleALegs = diagonalFromTopLeftToBottomRight ? min(insideCellCoord.x, insideCellCoord.y) : min(insideCellCoord.x, cellWidth - insideCellCoord.y); + const float2 DistancesToTriangleBLegs = diagonalFromTopLeftToBottomRight ? min(cellWidth - insideCellCoord.x, cellWidth - insideCellCoord.y) : min(cellWidth - insideCellCoord.x, insideCellCoord.y); + + float distanceToTriangleAExclusiveCorner = min(DistancesToTriangleALegs.x, DistancesToTriangleALegs.y); + float distanceToTriangleBExclusiveCorner = min(DistancesToTriangleBLegs.x, DistancesToTriangleBLegs.y); + + // my ASCII art above explains which triangle is A and which is B + const bool triangleA = distanceToTriangleAExclusiveCorner <= distanceToTriangleBExclusiveCorner; + + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; - //return outputColor; - printf("uv = %f, %f", uv.x, uv.y); + if (diagonalFromTopLeftToBottomRight) + { + v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); + v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, 0.0f); + v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, 0.0f) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); + } + else + { + v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, 0.0f); + v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); + v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, 0.0f); + } + + // TODO: remove when implementing height texture + [unroll] + for (uint i = 0; i < 3; ++i) + { + v[i].z = -20.0f + 5.0f * (v[i].x + v[i].y) / cellWidth; + + //if (abs(round(v[i].z) - 20.0f) <= 0.1f) + // v[i].z = asfloat(0x7FC00000); + + } + + if (isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z)) + { + discard; + } + + // move from grid space to screen space + [unroll] + for (int i = 0; i < 3; ++i) + v[i].xy += topLeft; + + if (triangleA) + { + outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[2].xy, v[0].xy); + outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[2].xy, v[1].xy); + } + else + { + outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[1].xy, v[2].xy); + outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[2].xy); + } + + // test diagonal draw + //outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[1].xy); + //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[1].xy); + + + float distancesToVerticalCellSides = min(insideCellCoord.x, cellWidth - insideCellCoord.x); + float distancesToHorizontalCellSides = min(insideCellCoord.y, cellWidth - insideCellCoord.y); + + float patternCellCoord = distancesToVerticalCellSides >= distancesToHorizontalCellSides ? cellCoords.x : cellCoords.y; + + // TODO: calculate pattern length!!! + float patternLength = 30.0f; + outlinePhaseShift = (cellWidth * (1.0f / globals.screenToWorldRatio) * patternCellCoord) / patternLength; + } + + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); + float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + float2 heightDeriv = fwidth(height); + + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dtmSettings.drawOutlineEnabled()) + dtmColor = dtm::blendUnder(dtmColor, calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); + if (dtmSettings.drawContourEnabled()) + { + for (uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); + } + if (dtmSettings.drawHeightShadingEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + + textureColor = dtmColor.rgb; + localAlpha = dtmColor.a; } - uint2 fragCoord = uint2(input.position.xy); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index e92a8d33b..f676aa206 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -650,9 +650,8 @@ PSInput main(uint vertexID : SV_VertexID) pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); pfloat64_t height = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); pfloat64_t width = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(pfloat64_t), 8u); - uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); - uint32_t dtmSettingsID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); - float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + 2 * sizeof(uint32_t), 8u); + uint32_t dtmSettingsID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); + float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); pfloat64_t2 vtxPos = topLeft; @@ -664,10 +663,13 @@ PSInput main(uint vertexID : SV_VertexID) float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); outV.position = float4(ndcVtxPos, 0.0f, 1.0f); - outV.setHeightMapTextureID(textureID); - outV.setDTMSettingsID(dtmSettingsID); - outV.setGridDTMScreenSpaceCellWidth(gridCellWidth); // TODO: is input world space? + outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); + outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); + pfloat64_t2 gridExtents; + gridExtents.x = width; + gridExtents.y = height; + outV.setGridDTMScreenSpaceGridExtents(gridExtents * globals.screenToWorldRatio); outV.setImageUV(corner); } From f335b2822a3c213a5193f1bb70962e08065a5523 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 23 May 2025 17:00:51 +0200 Subject: [PATCH 097/129] Improved phase shift calculation --- 62_CAD/DrawResourcesFiller.cpp | 6 ++ 62_CAD/main.cpp | 2 +- 62_CAD/shaders/globals.hlsl | 2 + 62_CAD/shaders/main_pipeline/common.hlsl | 10 ++-- 62_CAD/shaders/main_pipeline/dtm.hlsl | 39 +++++++++++++ .../main_pipeline/fragment_shader.hlsl | 56 ++----------------- .../shaders/main_pipeline/vertex_shader.hlsl | 2 + 7 files changed, 60 insertions(+), 57 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c94529656..483545b2c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -598,6 +598,12 @@ void DrawResourcesFiller::drawGridDTM( gridDTMInfo.width = width; gridDTMInfo.gridCellWidth = gridCellWidth; + if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) + { + const bool isOutlineStippled = dtmSettingsInfo.outlineStyleInfo.stipplePatternSize > 0; + gridDTMInfo.outlineStipplePatternLengthReciprocal = isOutlineStippled ? dtmSettingsInfo.outlineStyleInfo.reciprocalStipplePatternLen : 0.0f; + } + setActiveDTMSettings(dtmSettingsInfo); beginMainObject(MainObjectType::GRID_DTM); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 361ce54ee..5b13520ba 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -45,7 +45,7 @@ static constexpr bool DebugModeWireframe = false; static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; -static constexpr bool CacheAndReplay = true; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic +static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic enum class ExampleMode { diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index b6385a0bf..cd88773f1 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -260,6 +260,8 @@ struct GridDTMInfo pfloat64_t width; // 8 bytes (32) uint32_t textureID; // 4 bytes (36) float gridCellWidth; // 4 bytes (40) + float outlineStipplePatternLengthReciprocal; // 4 bytes (44) + float _padding; // 4 bytes (48) }; static uint32_t packR11G11B10_UNORM(float32_t3 color) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index c0e44ab29..e492fe4ec 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -233,16 +233,18 @@ struct PSInput /* GRID DTM */ uint getGridDTMHeightTextureID(uint textureID) { return data1.z; } - float getGridDTMScreenSpaceCellWidth() { return data3.x; } - float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } float2 getGridDTMScreenSpaceTopLeft() { return data2.xy; } float2 getGridDTMScreenSpaceGridExtents() { return data2.zw; } + float getGridDTMScreenSpaceCellWidth() { return data3.x; } + float getGridDTMOutlineStipplePatternLengthReciprocal() { return data3.y; } + float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } void setGridDTMHeightTextureID(uint textureID) { data1.z = textureID; } - void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data3.x = screenSpaceGridWidth; } - void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } void setGridDTMScreenSpaceTopLeft(float2 screenSpaceTopLeft) { data2.xy = screenSpaceTopLeft; } void setGridDTMScreenSpaceGridExtents(float2 screenSpaceGridExtends) { data2.zw = screenSpaceGridExtends; } + void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data3.x = screenSpaceGridWidth; } + void setGridDTMOutlineStipplePatternLengthReciprocal(float outlineStipplePatternLength) { data3.y = outlineStipplePatternLength; } + void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index ee698e19c..839b5483e 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -371,6 +371,45 @@ float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float3 v[3], in return outputColor; } +float4 calculateGridDTMOutlineColor(in uint outlineLineStyleIdx, in nbl::hlsl::shapes::Line outlineLineSegments[2], in float2 fragPos, in float phaseShift) +{ + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; + const float stretch = 1.0f; + + // find distance to outline + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + distance = ClippedSignedDistance >::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 2; ++i) + { + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(outlineLineSegments[i]); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, outlineLineSegments[i], arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + } + + float4 outputColor; + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + float4 blendUnder(in float4 dstColor, in float4 srcColor) { dstColor.rgb = dstColor.rgb + (1 - dstColor.a) * srcColor.a * srcColor.rgb; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 6d3a20ddc..7f72c5d2e 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,46 +117,6 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } -// TODO: move to other header -float4 calculateGridDTMOutlineColor(in uint outlineLineStyleIdx, in nbl::hlsl::shapes::Line outlineLineSegments[2], in float2 fragPos, in float phaseShift) -{ - LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); - const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth * globals.screenToWorldRatio) * 0.5f; - const float stretch = 1.0f; - - // find distance to outline - float minDistance = nbl::hlsl::numeric_limits::max; - if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) - { - for (int i = 0; i < 2; ++i) - { - float distance = nbl::hlsl::numeric_limits::max; - distance = ClippedSignedDistance >::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); - - minDistance = min(minDistance, distance); - } - } - else - { - for (int i = 0; i < 2; ++i) - { - float distance = nbl::hlsl::numeric_limits::max; - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(outlineLineSegments[i]); - LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, outlineLineSegments[i], arcLenCalc, phaseShift, stretch, globals.worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(outlineLineSegments[i], fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); - - minDistance = min(minDistance, distance); - } - } - - float4 outputColor; - outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); - outputColor.a *= outlineStyle.color.a; - outputColor.rgb = outlineStyle.color.rgb; - - return outputColor; -} - [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -511,18 +471,10 @@ float4 fragMain(PSInput input) : SV_TARGET // TODO: remove when implementing height texture [unroll] for (uint i = 0; i < 3; ++i) - { v[i].z = -20.0f + 5.0f * (v[i].x + v[i].y) / cellWidth; - //if (abs(round(v[i].z) - 20.0f) <= 0.1f) - // v[i].z = asfloat(0x7FC00000); - - } - if (isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z)) - { discard; - } // move from grid space to screen space [unroll] @@ -550,9 +502,9 @@ float4 fragMain(PSInput input) : SV_TARGET float patternCellCoord = distancesToVerticalCellSides >= distancesToHorizontalCellSides ? cellCoords.x : cellCoords.y; - // TODO: calculate pattern length!!! - float patternLength = 30.0f; - outlinePhaseShift = (cellWidth * (1.0f / globals.screenToWorldRatio) * patternCellCoord) / patternLength; + float reciprocalPatternLength = input.getGridDTMOutlineStipplePatternLengthReciprocal(); + if(reciprocalPatternLength > 0.0f) + outlinePhaseShift = (cellWidth * (1.0f / globals.screenToWorldRatio) * patternCellCoord) * reciprocalPatternLength; } const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); @@ -561,7 +513,7 @@ float4 fragMain(PSInput input) : SV_TARGET float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); if (dtmSettings.drawOutlineEnabled()) - dtmColor = dtm::blendUnder(dtmColor, calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); if (dtmSettings.drawContourEnabled()) { for (uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 97d438fb2..1074cc265 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -652,6 +652,7 @@ PSInput main(uint vertexID : SV_VertexID) pfloat64_t width = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(pfloat64_t), 8u); uint32_t dtmSettingsID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); + float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t) + sizeof(float), 8u); const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); pfloat64_t2 vtxPos = topLeft; @@ -671,6 +672,7 @@ PSInput main(uint vertexID : SV_VertexID) gridExtents.y = height; outV.setGridDTMScreenSpaceGridExtents(gridExtents * globals.screenToWorldRatio); outV.setImageUV(corner); + outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); } else if (objType == ObjectType::STREAMED_IMAGE) { From 43f4dd616c0f0b05ba58393ec138c28ad72a3a07 Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 23 May 2025 18:31:43 +0200 Subject: [PATCH 098/129] Added a todo comment --- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 7f72c5d2e..36fa3abf4 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -398,7 +398,7 @@ float4 fragMain(PSInput input) : SV_TARGET // DONE: Make it so we can choose which diagonal to use to construct the triangle, it's either u=v or u=1-v // DONE: C. Height shading same as contours (split into two triangles) - // Heights can have invalid values (let's say NaN) if a cell corner has NaN value then no triangle (for contour and shading) and no outline should include that corner. (see DTM image in discord with gaps) + // DONE (but needs to be tested after i implement texture height maps) Heights can have invalid values (let's say NaN) if a cell corner has NaN value then no triangle (for contour and shading) and no outline should include that corner. (see DTM image in discord with gaps) // TODO: we need to emulate dilation and do sdf of neighbouring cells as well. because contours, outlines and shading can bleed into other cells for AA. // [NOTE] Do dilation as last step, when everything else works fine From 81a5c4356aa0c36a5d776db77a37bb0b346396c5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 23 May 2025 23:22:44 -0300 Subject: [PATCH 099/129] Add test for cache copy --- 21_LRUCacheUnitTest/main.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/21_LRUCacheUnitTest/main.cpp b/21_LRUCacheUnitTest/main.cpp index 1e7830b16..467c6d4e4 100644 --- a/21_LRUCacheUnitTest/main.cpp +++ b/21_LRUCacheUnitTest/main.cpp @@ -203,6 +203,16 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL counter++; } + // Cache copy test + ResizableLRUCache cache4Copy(cache4); + for (auto it = cache4.cbegin(), itCopy = cache4Copy.cbegin(); it != cache4.cend(); it++, itCopy++) + { + assert(*it == *itCopy); + // Assert deep copy + assert(it.operator->() != itCopy.operator->()); + + } + // Besides the disposal function that gets called when evicting, we need to check that the Cache properly destroys all resident `Key,Value` pairs when destroyed struct Foo { @@ -236,10 +246,8 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL cache5.insert(i, Foo(&destroyCounter)); int x = 0; } - assert(destroyCounter == 10); - m_logger->log("all good"); m_textureLRUCache = std::unique_ptr(new TextureLRUCache(1024u)); From 6999e3674052eb3dcb6a29e9d651c862b4c4d215 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 24 May 2025 18:47:18 +0400 Subject: [PATCH 100/129] ensure multiple static images availability --- 62_CAD/DrawResourcesFiller.cpp | 20 +++++++++++++++++++- 62_CAD/DrawResourcesFiller.h | 25 +++++++++++++++++++++---- 62_CAD/main.cpp | 2 +- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 483545b2c..7ece534cf 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -360,8 +360,11 @@ void DrawResourcesFiller::drawFontGlyph( } } -bool DrawResourcesFiller::ensureStaticImageAvailability(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { + const auto& imageID = staticImage.imageID; + const auto& cpuImage = staticImage.cpuImage; + // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; @@ -447,6 +450,21 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(image_id imageID, const return cachedImageRecord->arrayIndex != InvalidTextureIndex; } +bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit) +{ + for (auto& staticImage : staticImages) + { + if (!ensureStaticImageAvailability(staticImage, intendedNextSubmit)) + return false; // failed ensuring a single staticImage is available, shouldn't happen unless the image is larger than the memory arena allocated for images. + } + for (auto& staticImage : staticImages) + { + if (imagesCache->peek(staticImage.imageID) == nullptr) + return false; // this means one of the images evicted another, most likely due to VRAM limitations not all images can be resident all at once. + } + return true; +} + bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) { auto* device = m_utilities->getLogicalDevice(); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 0b7a8cf01..18c09f83e 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -216,6 +216,12 @@ struct DrawResourcesFiller const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); + struct StaticImageInfo + { + image_id imageID; + core::smart_refctd_ptr cpuImage; + }; + /** * @brief Adds a static 2D image to the draw resource set for rendering. * @@ -230,8 +236,7 @@ struct DrawResourcesFiller * - Queues the image for uploading via staging in the next submit. * - If memory is constrained, attempts to evict other images to free up space. * - * @param imageID Unique identifier for the image resource. - * @param cpuImage The CPU-side image resource to (possibly) upload. + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. * * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. @@ -240,9 +245,21 @@ struct DrawResourcesFiller * * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. - * @return true if the image was successfully cached and is ready for use; false if allocation failed. + * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. */ - bool ensureStaticImageAvailability(image_id imageID, const core::smart_refctd_ptr& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Adds multiple static 2D image to the draw resource set for rendering. + * + * This function should theoratically succeed if the size of staticImages is less that max descriptor slots and more importantly if all of the images can fit in the images memory arena (using the GeneralPurposeAddressAllocatoe) + * There is a low chance that failure might be due to fragmentation of images memory allocator (GPAA), in which case clearing the cache and retrying MIGHT work. + * + * @return true if all of them are successfully cache and available for rendering + * @return false if the images couldn't be resident all at once. // TODO: maybe return something about which ones are available. + */ + bool ensureStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); + /** * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 5b13520ba..c5123473b 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3059,7 +3059,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from //printf(std::format("\n Image {} \n", i).c_str()); - drawResourcesFiller.ensureStaticImageAvailability(imageID, sampleImages[i], intendedNextSubmit); + drawResourcesFiller.ensureStaticImageAvailability({ imageID, sampleImages[i] }, intendedNextSubmit); drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); //printf("\n"); } From 48864b79128cfbd8bfe2989287e726b9fe070063 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 24 May 2025 18:48:03 +0400 Subject: [PATCH 101/129] forgot to push function rename --- 62_CAD/DrawResourcesFiller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 18c09f83e..ba8cb80de 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -258,7 +258,7 @@ struct DrawResourcesFiller * @return true if all of them are successfully cache and available for rendering * @return false if the images couldn't be resident all at once. // TODO: maybe return something about which ones are available. */ - bool ensureStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); /** From 96cc801291537b119e053342cf42e29ec1c0fd26 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 24 May 2025 18:52:47 +0400 Subject: [PATCH 102/129] documentation for `ensureMultipleStaticImagesAvailability` --- 62_CAD/DrawResourcesFiller.cpp | 3 +++ 62_CAD/DrawResourcesFiller.h | 29 ++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7ece534cf..9fa24e1a0 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -452,6 +452,9 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit) { + if (staticImages.size() > ImagesBindingArraySize) + return false; + for (auto& staticImage : staticImages) { if (!ensureStaticImageAvailability(staticImage, intendedNextSubmit)) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index ba8cb80de..04bc08df3 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -246,21 +246,32 @@ struct DrawResourcesFiller * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. - */ + */ bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Adds multiple static 2D image to the draw resource set for rendering. - * - * This function should theoratically succeed if the size of staticImages is less that max descriptor slots and more importantly if all of the images can fit in the images memory arena (using the GeneralPurposeAddressAllocatoe) - * There is a low chance that failure might be due to fragmentation of images memory allocator (GPAA), in which case clearing the cache and retrying MIGHT work. - * - * @return true if all of them are successfully cache and available for rendering - * @return false if the images couldn't be resident all at once. // TODO: maybe return something about which ones are available. + * @brief Ensures that multiple static 2D images are resident and ready for rendering. + * + * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` + * for each. Afterward, it verifies that none of the newly ensured images have been evicted, + * which could happen due to limited VRAM or memory fragmentation. + * + * This function is expected to succeed if: + * - The number of images does not exceed `ImagesBindingArraySize`. + * - Each image individually fits into the image memory arena. + * - There is enough VRAM to hold all images simultaneously. + * + * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return true If all images were successfully made resident and none were evicted during the process. + * @return false If: + * - The number of images exceeds the descriptor binding array size. + * - Any individual image could not be made resident (e.g., larger than the allocator can support). + * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) */ bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); - /** * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. * From 280d119e6435928496ac69da91782e60ddea5dca Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 26 May 2025 13:52:16 +0400 Subject: [PATCH 103/129] Fixes to images cache regarding georeferenced image resize --- 62_CAD/DrawResourcesFiller.cpp | 37 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 9fa24e1a0..9b0bdfaac 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -433,6 +433,9 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); cachedImageRecord->arrayIndex = InvalidTextureIndex; } + + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); } } else @@ -579,6 +582,9 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); cachedImageRecord->arrayIndex = InvalidTextureIndex; } + + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); } } else @@ -2056,13 +2062,15 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Cach } } -DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName) +DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName) { ImageAllocateResults ret = {}; auto* device = m_utilities->getLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + bool alreadyBlockedForDeferredFrees = false; + // Attempt to create a GPU image and corresponding image view for this texture. // If creation or memory allocation fails (likely due to VRAM exhaustion), // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageRecord image remains. @@ -2150,22 +2158,31 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllo } // Getting here means we failed creating or allocating the image, evict and retry. - if (imagesCache->size() == 1u) + + + // If imageCache size is 1 it means there is nothing else to evict, but there may still be already evicts/frees queued up. + // `cull_frees` will make sure all pending deallocations will be blocked for. + if (imagesCache->size() == 1u && alreadyBlockedForDeferredFrees) { - // Nothing else to evict; give up. - // We probably have evicted almost every other texture except the one we just allocated an index for + // We give up, it's really nothing we can do, no image to evict (alreadyBlockedForDeferredFrees==1) and no more memory to free up (alreadyBlockedForDeferredFrees). + // We probably have evicted almost every other texture except the one we just allocated an index for. + // This is most likely due to current image memory requirement being greater than the whole memory allocated for all images _NBL_DEBUG_BREAK_IF(true); + // TODO[LOG] break; } - assert(imagesCache->size() > 1u); + if (imagesCache->size() > 1u) + { + const image_id evictionCandidate = imagesCache->select_eviction_candidate(); + CachedImageRecord* imageRef = imagesCache->peek(evictionCandidate); + if (imageRef) + evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); + imagesCache->erase(evictionCandidate); + } - const image_id evictionCandidate = imagesCache->select_eviction_candidate(); - CachedImageRecord* imageRef = imagesCache->peek(evictionCandidate); - if (imageRef) - evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); - imagesCache->erase(evictionCandidate); while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + alreadyBlockedForDeferredFrees = true; // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. From 0b693638c1f68f13229ad103d10bc4791de2c13b Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 26 May 2025 15:49:40 +0400 Subject: [PATCH 104/129] edits after LRUCache Improvement --- 62_CAD/DrawResourcesFiller.cpp | 23 ++++++++--------------- 62_CAD/Images.h | 9 +-------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 9b0bdfaac..1d6e95e66 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -707,9 +707,6 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit bool replayCacheFullyCovered = true; for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) { - // TODO: remove temoprary const_cast workaround. - CachedImageRecord& toReplayImageRecord_nonConst = const_cast(toReplayRecord); - if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this continue; @@ -731,7 +728,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. if (alreadyResident) { - toReplayImageRecord_nonConst.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state + toReplayRecord.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state continue; } @@ -764,8 +761,8 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit if (newGPUImageView) { successCreateNewImage = true; - toReplayImageRecord_nonConst.gpuImageView = newGPUImageView; - toReplayImageRecord_nonConst.state = ImageState::CREATED_AND_MEMORY_BOUND; + toReplayRecord.gpuImageView = newGPUImageView; + toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); } @@ -781,8 +778,9 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit } // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. + // imagesCache = std::make_unique(*currentReplayCache->imagesCache); imagesCache->clear(); - for (auto it = currentReplayCache->imagesCache->crbegin(); it != currentReplayCache->imagesCache->crend(); it++) + for (auto it = currentReplayCache->imagesCache->rbegin(); it != currentReplayCache->imagesCache->rend(); it++) imagesCache->base_t::insert(it->first, it->second); if (!replayCacheFullyCovered) @@ -903,12 +901,7 @@ std::unique_ptr DrawResourcesFiller::createRep stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; - ret->imagesCache = std::unique_ptr(new ImagesCache(imagesCache->size())); - // It should be copyable, here is a temporary hack: - for (auto it = imagesCache->crbegin(); it != imagesCache->crend(); it++) - { - ret->imagesCache->base_t::insert(it->first, it->second); - } + ret->imagesCache = std::unique_ptr(new ImagesCache(*imagesCache)); return ret; } @@ -1138,7 +1131,7 @@ bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; descriptorWrites[descriptorWriteCount] = descriptorWrite; - const_cast(record).state = ImageState::BOUND_TO_DESCRIPTOR_SET; + record.state = ImageState::BOUND_TO_DESCRIPTOR_SET; descriptorWriteCount++; } @@ -1157,7 +1150,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN for (auto& [id, record] : imagesCache) { if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) - nonResidentImageRecords.push_back(const_cast(&record)); // TODO: remove const_cast + nonResidentImageRecords.push_back(&record); } if (nonResidentImageRecords.size() > 0ull) diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 73be7ed50..ed09da9d6 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -163,14 +163,7 @@ class ImagesCache : public core::ResizableLRUCache template EvictionCallback> inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) { - auto lruEvictionCallback = [&](const CachedImageRecord& evicted) - { - const image_id* evictingKey = base_t::get_least_recently_used(); - assert(evictingKey != nullptr); - if (evictingKey) - evictCallback(*evictingKey, evicted); - }; - return base_t::insert(imageID, lastUsedSema, lruEvictionCallback); + return base_t::insert(imageID, lastUsedSema, evictCallback); } // Retrieves the image associated with `imageID`, updating its LRU position. From d33f32e5b7370bc3e87488ae455a8e74419deba1 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 26 May 2025 16:30:44 +0200 Subject: [PATCH 105/129] Corrections --- 62_CAD/DrawResourcesFiller.cpp | 8 +++++++- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 11 ++++------- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 15 +++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 1d6e95e66..8b440edf7 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -637,7 +637,13 @@ void DrawResourcesFiller::drawGridDTM( uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); assert(mainObjectIdx != InvalidMainObjectIdx); - addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + if (!addGridDTM_Internal(gridDTMInfo, mainObjectIdx)) + { + // single grid DTM couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); + bool success = addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GridDTMInfo + } endMainObject(); } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 36fa3abf4..7738b169b 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -443,15 +443,12 @@ float4 fragMain(PSInput input) : SV_TARGET const float MaxCellCoordY = round(gridExtents.y / cellWidth); float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? - - const float2 DistancesToTriangleALegs = diagonalFromTopLeftToBottomRight ? min(insideCellCoord.x, insideCellCoord.y) : min(insideCellCoord.x, cellWidth - insideCellCoord.y); - const float2 DistancesToTriangleBLegs = diagonalFromTopLeftToBottomRight ? min(cellWidth - insideCellCoord.x, cellWidth - insideCellCoord.y) : min(cellWidth - insideCellCoord.x, insideCellCoord.y); - - float distanceToTriangleAExclusiveCorner = min(DistancesToTriangleALegs.x, DistancesToTriangleALegs.y); - float distanceToTriangleBExclusiveCorner = min(DistancesToTriangleBLegs.x, DistancesToTriangleBLegs.y); // my ASCII art above explains which triangle is A and which is B - const bool triangleA = distanceToTriangleAExclusiveCorner <= distanceToTriangleBExclusiveCorner; + const bool triangleA = diagonalFromTopLeftToBottomRight ? + insideCellCoord.x < cellWidth - insideCellCoord.y : + insideCellCoord.x < insideCellCoord.y; + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 1074cc265..0624c159d 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -655,11 +655,13 @@ PSInput main(uint vertexID : SV_VertexID) float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t) + sizeof(float), 8u); const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + pfloat64_t2 gridExtents; + gridExtents.x = width; + gridExtents.y = -height; + pfloat64_t2 vtxPos = topLeft; - if (corner.x) - vtxPos.x = vtxPos.x + width; - if (corner.y) - vtxPos.y = vtxPos.y - height; + vtxPos = vtxPos + corner * gridExtents; + gridExtents.y = -gridExtents.y; float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); outV.position = float4(ndcVtxPos, 0.0f, 1.0f); @@ -667,10 +669,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); - pfloat64_t2 gridExtents; - gridExtents.x = width; - gridExtents.y = height; - outV.setGridDTMScreenSpaceGridExtents(gridExtents * globals.screenToWorldRatio); + outV.setGridDTMScreenSpaceGridExtents(_static_cast(gridExtents) * globals.screenToWorldRatio); outV.setImageUV(corner); outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); } From 2f743b5a9f9c1f83e2c0b5e6aae62f632416c888 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 27 May 2025 16:06:31 +0200 Subject: [PATCH 106/129] Implemented height map --- 62_CAD/DrawResourcesFiller.cpp | 4 +- 62_CAD/DrawResourcesFiller.h | 3 +- 62_CAD/main.cpp | 30 +++++++++++++-- 62_CAD/shaders/main_pipeline/common.hlsl | 2 +- .../main_pipeline/fragment_shader.hlsl | 37 +++++++++++-------- .../shaders/main_pipeline/vertex_shader.hlsl | 3 +- 6 files changed, 55 insertions(+), 24 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 8b440edf7..c81f781bf 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -613,9 +613,10 @@ bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) void DrawResourcesFiller::drawGridDTM( const float64_t2& topLeft, - float64_t height, float64_t width, + float64_t height, float gridCellWidth, + uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { @@ -624,6 +625,7 @@ void DrawResourcesFiller::drawGridDTM( gridDTMInfo.height = height; gridDTMInfo.width = width; gridDTMInfo.gridCellWidth = gridCellWidth; + gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) { diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 04bc08df3..619257e2c 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -210,9 +210,10 @@ struct DrawResourcesFiller SIntendedSubmitInfo& intendedNextSubmit); void drawGridDTM(const float64_t2& topLeft, - float64_t height, float64_t width, + float64_t height, float gridCellWidth, + uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index c5123473b..7114c1a06 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1153,7 +1153,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu "../../media/color_space_test/R8G8B8A8_1.png", }; - for (const auto& imagePath : imagePaths) + auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr { constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); @@ -1162,6 +1162,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (contents.empty()) { m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + return nullptr; } smart_refctd_ptr cpuImgView; @@ -1195,6 +1196,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu break; default: m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + return nullptr; } @@ -1243,14 +1245,24 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); - sampleImages.push_back(promotedCPUImage); + return promotedCPUImage; } else { - sampleImages.push_back(loadedCPUImage); + return loadedCPUImage; } + }; + + for (const auto& imagePath : imagePaths) + { + auto image = loadImage(imagePath); + if (image) + sampleImages.push_back(image); } + gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + assert(gridDTMHeightMap); + return true; } @@ -3598,7 +3610,16 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, 400.0f, 800.0f, 40.0f, dtmInfo, intendedNextSubmit); + constexpr float HeightMapCellWidth = 50.0f; + const auto heightMapExtent = gridDTMHeightMap->getCreationParameters().extent; + assert(heightMapExtent.width > 0 && heightMapExtent.height > 0); + const float heightMapWidth = (heightMapExtent.width - 1) * HeightMapCellWidth; + const float heightMapHeight = (heightMapExtent.height - 1) * HeightMapCellWidth; + + const uint64_t heightMapTextureID = 0ull; + if (!drawResourcesFiller.ensureStaticImageAvailability({ heightMapTextureID, gridDTMHeightMap }, intendedNextSubmit)) + m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); + drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, heightMapWidth, heightMapHeight, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); } } @@ -3673,6 +3694,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::vector> m_shapeMSDFImages = {}; std::vector> sampleImages; + smart_refctd_ptr gridDTMHeightMap; static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index e492fe4ec..79dbc0bd1 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -232,7 +232,7 @@ struct PSInput #endif /* GRID DTM */ - uint getGridDTMHeightTextureID(uint textureID) { return data1.z; } + uint getGridDTMHeightTextureID() { return data1.z; } float2 getGridDTMScreenSpaceTopLeft() { return data2.xy; } float2 getGridDTMScreenSpaceGridExtents() { return data2.zw; } float getGridDTMScreenSpaceCellWidth() { return data3.x; } diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 7738b169b..081a4ef16 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -404,7 +404,10 @@ float4 fragMain(PSInput input) : SV_TARGET // [NOTE] Do dilation as last step, when everything else works fine DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + float2 pos = input.getGridDTMScreenSpacePosition(); + float2 uv = input.getImageUV(); + const uint32_t textureId = input.getGridDTMHeightTextureID(); // grid consists of square cells and cells are divided into two triangles: // depending on mode it is @@ -438,37 +441,39 @@ float4 fragMain(PSInput input) : SV_TARGET cellCoords.y = uint32_t(gridSpacePosDivGridCellWidth.y); } - // TODO: do we want to calculate it in the vertex shader? - const float MaxCellCoordX = round(gridExtents.x / cellWidth); - const float MaxCellCoordY = round(gridExtents.y / cellWidth); - float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? // my ASCII art above explains which triangle is A and which is B const bool triangleA = diagonalFromTopLeftToBottomRight ? insideCellCoord.x < cellWidth - insideCellCoord.y : insideCellCoord.x < insideCellCoord.y; - float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + const float InvalidHeightValue = asfloat(0x7FC00000); + float4 cellHeights = float4(InvalidHeightValue, InvalidHeightValue, InvalidHeightValue, InvalidHeightValue); + if (textureId != InvalidTextureIndex) + { + const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); + const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; + + cellHeights = textures[NonUniformResourceIndex(textureId)].Gather(textureSampler, float2(location.x, location.y), 0); + if (cellHeights.x == 100.0f) + printf("uv = { %f, %f }cellHeights = { %f, %f, %f, %f }", location.x, location.y, cellHeights.x, cellHeights.y, cellHeights.z, cellHeights.w); + } + if (diagonalFromTopLeftToBottomRight) { - v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); - v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, 0.0f); - v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, 0.0f) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); + v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); + v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); } else { - v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, 0.0f); - v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f); - v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, 0.0f) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, 0.0f); + v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); + v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); } - - // TODO: remove when implementing height texture - [unroll] - for (uint i = 0; i < 3; ++i) - v[i].z = -20.0f + 5.0f * (v[i].x + v[i].y) / cellWidth; if (isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z)) discard; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 0624c159d..7fd533439 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -650,7 +650,7 @@ PSInput main(uint vertexID : SV_VertexID) pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); pfloat64_t height = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); pfloat64_t width = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(pfloat64_t), 8u); - uint32_t dtmSettingsID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t) + sizeof(float), 8u); @@ -666,6 +666,7 @@ PSInput main(uint vertexID : SV_VertexID) float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); outV.position = float4(ndcVtxPos, 0.0f, 1.0f); + outV.setGridDTMHeightTextureID(textureID); outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); From a1c6dd1a16bb09a76e43525c091a8561111ee16d Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Wed, 28 May 2025 09:18:42 +0400 Subject: [PATCH 107/129] forceUpdate in ensureStaticImageAvailability --- 62_CAD/DrawResourcesFiller.cpp | 49 +++++++++++++++++++++++++++------- 62_CAD/DrawResourcesFiller.h | 11 +++----- 62_CAD/Images.h | 8 ++++++ 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c81f781bf..bbdb4eb7c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -362,15 +362,44 @@ void DrawResourcesFiller::drawFontGlyph( bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { - const auto& imageID = staticImage.imageID; - const auto& cpuImage = staticImage.cpuImage; - // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + CachedImageRecord* cachedImageRecord = imagesCache->insert(staticImage.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + if (cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) + { + // found in cache, and we want to force new data into the image + if (cachedImageRecord->staticCPUImage) + { + const auto cachedImageParams = cachedImageRecord->staticCPUImage->getCreationParameters(); + const auto newImageParams = staticImage.cpuImage->getCreationParameters(); + const bool needsRecreation = newImageParams != cachedImageParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set + evictCallback(staticImage.imageID, *cachedImageRecord); + + // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + *cachedImageRecord = CachedImageRecord(currentFrameIndex); + } + else + { + // Doesn't need image recreation, we'll use the same array index in descriptor set + the same bound memory. + // reset it's state + update the cpu image used for copying. + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; + } + } + else + { + // TODO[LOG]: ? found static image has empty cpu image, shouldn't happen + } + } + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx if (cachedImageRecord->arrayIndex == InvalidTextureIndex) @@ -386,7 +415,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); IGPUImage::SCreationParams imageParams = {}; - imageParams = cpuImage->getCreationParameters(); + imageParams = staticImage.cpuImage->getCreationParameters(); imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; // promote format because RGB8 and friends don't actually exist in HW { @@ -398,7 +427,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, intendedNextSubmit, std::to_string(imageID)); + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, intendedNextSubmit, std::to_string(staticImage.imageID)); if (allocResults.isValid()) { @@ -408,7 +437,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; - cachedImageRecord->staticCPUImage = cpuImage; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; } else { @@ -434,8 +463,8 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s cachedImageRecord->arrayIndex = InvalidTextureIndex; } - // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation - imagesCache->erase(imageID); + // erase the entry we failed to allocate an image for, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(staticImage.imageID); } } else @@ -516,7 +545,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; if (needsRecreation) { - // call the eviction callbacl so the currently cached imageID gets eventually deallocated from memory arena. + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. evictCallback(imageID, *cachedImageRecord); // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 619257e2c..b12eb8920 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -216,12 +216,6 @@ struct DrawResourcesFiller uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); - - struct StaticImageInfo - { - image_id imageID; - core::smart_refctd_ptr cpuImage; - }; /** * @brief Adds a static 2D image to the draw resource set for rendering. @@ -237,8 +231,9 @@ struct DrawResourcesFiller * - Queues the image for uploading via staging in the next submit. * - If memory is constrained, attempts to evict other images to free up space. * - * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. - * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. + * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. * * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers diff --git a/62_CAD/Images.h b/62_CAD/Images.h index ed09da9d6..bb7b7d3ae 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -208,3 +208,11 @@ struct StreamedImageCopy core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? asset::IImage::SBufferCopy region; }; + +// TODO: Rename to StaticImageAvailabilityRequest? +struct StaticImageInfo +{ + image_id imageID = ~0ull; + core::smart_refctd_ptr cpuImage = nullptr; + bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. +}; From 3ccb6f957f978b9f675bce85927e311161e81db2 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 29 May 2025 13:19:34 +0200 Subject: [PATCH 108/129] Implemented nan height value handling --- 62_CAD/DrawResourcesFiller.cpp | 6 +- 62_CAD/DrawResourcesFiller.h | 3 +- 62_CAD/main.cpp | 16 ++-- 62_CAD/shaders/globals.hlsl | 3 +- .../main_pipeline/fragment_shader.hlsl | 85 ++++++++++++++----- .../shaders/main_pipeline/vertex_shader.hlsl | 19 ++--- 6 files changed, 87 insertions(+), 45 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index c81f781bf..a33d16b74 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -613,8 +613,7 @@ bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) void DrawResourcesFiller::drawGridDTM( const float64_t2& topLeft, - float64_t width, - float64_t height, + float64_t2 worldSpaceExtents, float gridCellWidth, uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, @@ -622,8 +621,7 @@ void DrawResourcesFiller::drawGridDTM( { GridDTMInfo gridDTMInfo; gridDTMInfo.topLeft = topLeft; - gridDTMInfo.height = height; - gridDTMInfo.width = width; + gridDTMInfo.worldSpaceExtents = worldSpaceExtents; gridDTMInfo.gridCellWidth = gridCellWidth; gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 619257e2c..09caa3b4f 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -210,8 +210,7 @@ struct DrawResourcesFiller SIntendedSubmitInfo& intendedNextSubmit); void drawGridDTM(const float64_t2& topLeft, - float64_t width, - float64_t height, + float64_t2 worldSpaceExtents, float gridCellWidth, uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 7114c1a06..3336f3609 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -77,7 +77,7 @@ constexpr std::array cameraExtents = 600.0, // CASE_8 600.0, // CASE_9 10.0, // CASE_BUG - 600.0 // CASE_11 + 1000.0 // CASE_11 }; constexpr ExampleMode mode = ExampleMode::CASE_11; @@ -3569,6 +3569,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // 2 - DISCRETE_FIXED_LENGTH_INTERVALS // 3 - CONTINOUS_INTERVALS float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + animatedAlpha = 1.0f; switch (m_shadingModeExample) { case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: @@ -3589,8 +3590,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; dtmInfo.heightShadingInfo.isCenteredShading = false; dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; - dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); - dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-20.0f, float32_t4(0.0f, 0.5f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 0.7f, 0.0f, animatedAlpha)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); @@ -3610,16 +3611,17 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - constexpr float HeightMapCellWidth = 50.0f; + constexpr float HeightMapCellWidth = 20.0f; const auto heightMapExtent = gridDTMHeightMap->getCreationParameters().extent; assert(heightMapExtent.width > 0 && heightMapExtent.height > 0); - const float heightMapWidth = (heightMapExtent.width - 1) * HeightMapCellWidth; - const float heightMapHeight = (heightMapExtent.height - 1) * HeightMapCellWidth; + float64_t2 worldSpaceExtents; + worldSpaceExtents.x = (heightMapExtent.width - 1) * HeightMapCellWidth; + worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; const uint64_t heightMapTextureID = 0ull; if (!drawResourcesFiller.ensureStaticImageAvailability({ heightMapTextureID, gridDTMHeightMap }, intendedNextSubmit)) m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); - drawResourcesFiller.drawGridDTM({ 0.0f, 200.0f }, heightMapWidth, heightMapHeight, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + drawResourcesFiller.drawGridDTM({ -400.0f, 400.0f }, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); } } diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index cd88773f1..0ff238289 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -256,8 +256,7 @@ struct GeoreferencedImageInfo struct GridDTMInfo { pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) - pfloat64_t height; // 8 bytes (24) - pfloat64_t width; // 8 bytes (32) + pfloat64_t2 worldSpaceExtents; // 16 bytes (32) uint32_t textureID; // 4 bytes (36) float gridCellWidth; // 4 bytes (40) float outlineStipplePatternLengthReciprocal; // 4 bytes (44) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 081a4ef16..45f64aac6 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,6 +117,40 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } +enum E_CELL_DIAGONAL +{ + TOP_LEFT_TO_BOTTOM_RIGHT, + BOTTOM_LEFT_TO_TOP_RIGHT, + INVALID +}; + +E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in float4 cellHeights) +{ + static const E_CELL_DIAGONAL DefaultDiagonal = TOP_LEFT_TO_BOTTOM_RIGHT; + + const bool4 invalidHeights = bool4( + isnan(cellHeights.x), + isnan(cellHeights.y), + isnan(cellHeights.z), + isnan(cellHeights.w) + ); + + int invalidHeightsCount = 0; + for (int i = 0; i < 4; ++i) + invalidHeightsCount += int(invalidHeights[i]); + + if (invalidHeightsCount == 0) + return DefaultDiagonal; + + if (invalidHeightsCount > 1) + return INVALID; + + if (invalidHeights.x || invalidHeights.z) + return TOP_LEFT_TO_BOTTOM_RIGHT; + else + return BOTTOM_LEFT_TO_TOP_RIGHT; +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -419,9 +453,6 @@ float4 fragMain(PSInput input) : SV_TARGET // v0-------v2b v2a-------v1 // - // TODO: probably needs to be a part of grid dtm settings struct - const bool diagonalFromTopLeftToBottomRight = true; - // calculate screen space coordinates of vertices of the current tiranlge within the grid float3 v[3]; nbl::hlsl::shapes::Line outlineLineSegments[2]; @@ -443,13 +474,6 @@ float4 fragMain(PSInput input) : SV_TARGET float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? - // my ASCII art above explains which triangle is A and which is B - const bool triangleA = diagonalFromTopLeftToBottomRight ? - insideCellCoord.x < cellWidth - insideCellCoord.y : - insideCellCoord.x < insideCellCoord.y; - - float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; - const float InvalidHeightValue = asfloat(0x7FC00000); float4 cellHeights = float4(InvalidHeightValue, InvalidHeightValue, InvalidHeightValue, InvalidHeightValue); if (textureId != InvalidTextureIndex) @@ -458,24 +482,47 @@ float4 fragMain(PSInput input) : SV_TARGET const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; cellHeights = textures[NonUniformResourceIndex(textureId)].Gather(textureSampler, float2(location.x, location.y), 0); - if (cellHeights.x == 100.0f) - printf("uv = { %f, %f }cellHeights = { %f, %f, %f, %f }", location.x, location.y, cellHeights.x, cellHeights.y, cellHeights.z, cellHeights.w); } + + const E_CELL_DIAGONAL cellDiagonal = resolveGridDTMCellDiagonal(cellHeights); + const bool diagonalFromTopLeftToBottomRight = cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + + if (cellDiagonal == E_CELL_DIAGONAL::INVALID) + discard; + + // my ASCII art above explains which triangle is A and which is B + const bool triangleA = diagonalFromTopLeftToBottomRight ? + insideCellCoord.x < insideCellCoord.y : + insideCellCoord.x < cellWidth - insideCellCoord.y; + + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + + //printf("uv = { %f, %f } diagonalTLtoBR = %i triangleA = %i, insiceCellCoords = { %f, %f }", uv.x, uv.y, int(diagonalFromTopLeftToBottomRight), int(triangleA), insideCellCoord.x / cellWidth, insideCellCoord.y / cellWidth); + if (diagonalFromTopLeftToBottomRight) - { - v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); - v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); - v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); - } - else { v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); } + else + { + v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); + v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + } + + if (triangleA) + printf("v0 = { %f, %f }, v1 = { %f, %f }, v2 = { %f, %f }", v[0].x, v[0].y, v[1].x, v[1].y, v[2].x, v[2].y); + + bool isTriangleInvalid = isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z); + bool isCellPartiallyInvalid = isnan(cellHeights.x) || isnan(cellHeights.y) || isnan(cellHeights.z) || isnan(cellHeights.w); + + if (!isTriangleInvalid && isCellPartiallyInvalid) + printf("asdf"); - if (isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z)) + if (isTriangleInvalid) discard; // move from grid space to screen space diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 7fd533439..cdeea3569 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -648,20 +648,17 @@ PSInput main(uint vertexID : SV_VertexID) else if (objType == ObjectType::GRID_DTM) { pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - pfloat64_t height = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - pfloat64_t width = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(pfloat64_t), 8u); - uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t), 8u); - float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t), 8u); - float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(pfloat64_t) + sizeof(uint32_t) + sizeof(float), 8u); + pfloat64_t2 worldSpaceExtents = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2), 8u); + float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); + float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - pfloat64_t2 gridExtents; - gridExtents.x = width; - gridExtents.y = -height; + worldSpaceExtents.y = -worldSpaceExtents.y; pfloat64_t2 vtxPos = topLeft; - vtxPos = vtxPos + corner * gridExtents; - gridExtents.y = -gridExtents.y; + vtxPos = vtxPos + corner * worldSpaceExtents; + worldSpaceExtents.y = -worldSpaceExtents.y; float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); outV.position = float4(ndcVtxPos, 0.0f, 1.0f); @@ -670,7 +667,7 @@ PSInput main(uint vertexID : SV_VertexID) outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); - outV.setGridDTMScreenSpaceGridExtents(_static_cast(gridExtents) * globals.screenToWorldRatio); + outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * globals.screenToWorldRatio); outV.setImageUV(corner); outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); } From 52d5670455047142c2865a4341398071a439bdfc Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 29 May 2025 16:29:11 +0200 Subject: [PATCH 109/129] Added test polyline draw --- 62_CAD/main.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 3336f3609..27589f1d2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3616,12 +3616,34 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu assert(heightMapExtent.width > 0 && heightMapExtent.height > 0); float64_t2 worldSpaceExtents; + const float64_t2 topLeft = { -400.0f, 400.0f }; worldSpaceExtents.x = (heightMapExtent.width - 1) * HeightMapCellWidth; worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; const uint64_t heightMapTextureID = 0ull; if (!drawResourcesFiller.ensureStaticImageAvailability({ heightMapTextureID, gridDTMHeightMap }, intendedNextSubmit)) m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); - drawResourcesFiller.drawGridDTM({ -400.0f, 400.0f }, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + + // draw test polyline + { + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 0.0f; + style.worldSpaceLineWidth = 15.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + + CPolyline polyline; + { + std::vector linePoints; + linePoints.push_back(topLeft); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, 0.0)); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, -worldSpaceExtents.y)); + linePoints.push_back(topLeft + float64_t2(0.0, -worldSpaceExtents.y)); + linePoints.push_back(topLeft); + polyline.addLinePoints(linePoints); + } + + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + } } } From f72a308feefebfd47e22fea756ed8a50dd78e30d Mon Sep 17 00:00:00 2001 From: Przemek Date: Fri, 30 May 2025 16:36:19 +0200 Subject: [PATCH 110/129] Fixed vertex shader so it now compiles with emulated float --- .../main_pipeline/fragment_shader.hlsl | 7 --- .../shaders/main_pipeline/vertex_shader.hlsl | 56 ++++++++++++++++--- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 45f64aac6..eacc4ae64 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -461,7 +461,6 @@ float4 fragMain(PSInput input) : SV_TARGET float2 topLeft = input.getGridDTMScreenSpaceTopLeft(); float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); float cellWidth = input.getGridDTMScreenSpaceCellWidth(); - float2 uv = input.getImageUV(); float2 gridSpacePos = uv * gridExtents; @@ -513,15 +512,9 @@ float4 fragMain(PSInput input) : SV_TARGET v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); } - if (triangleA) - printf("v0 = { %f, %f }, v1 = { %f, %f }, v2 = { %f, %f }", v[0].x, v[0].y, v[1].x, v[1].y, v[2].x, v[2].y); - bool isTriangleInvalid = isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z); bool isCellPartiallyInvalid = isnan(cellHeights.x) || isnan(cellHeights.y) || isnan(cellHeights.z) || isnan(cellHeights.w); - if (!isTriangleInvalid && isCellPartiallyInvalid) - printf("asdf"); - if (isTriangleInvalid) discard; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index cdeea3569..f01f1ca4b 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -654,22 +654,64 @@ PSInput main(uint vertexID : SV_VertexID) float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - worldSpaceExtents.y = -worldSpaceExtents.y; + worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); pfloat64_t2 vtxPos = topLeft; - vtxPos = vtxPos + corner * worldSpaceExtents; - worldSpaceExtents.y = -worldSpaceExtents.y; - - float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); - outV.position = float4(ndcVtxPos, 0.0f, 1.0f); + vtxPos.x = vtxPos.x + worldSpaceExtents.x * corner.x; + vtxPos.y = vtxPos.y + worldSpaceExtents.y * corner.y; + worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); outV.setGridDTMHeightTextureID(textureID); outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * globals.screenToWorldRatio); outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * globals.screenToWorldRatio); - outV.setImageUV(corner); outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); + + // TODO: finish implementing grid dilation + // TODO: calculate actual thicknessOfTheThickestLine + /*float thicknessOfTheThickestLine = 20.0f; + + static const float SquareRootOfTwo = 1.4142135f; + const pfloat64_t dilationFactor = SquareRootOfTwo * thicknessOfTheThickestLine; + pfloat64_t2 dilationVector = pfloat64_t2(dilationFactor, dilationFactor); + + if (corner.x == 0.0f && corner.y == 0.0f) + { + dilationVector.x = -dilationVector.x; + } + else if (corner.x == 0.0f && corner.y == 1.0f) + { + dilationVector.x = -dilationVector.x; + dilationVector.y = -dilationVector.y; + } + else if (corner.x == 1.0f && corner.y == 1.0f) + { + dilationVector.y = -dilationVector.y; + } + + const pfloat64_t dilationFactorTimesTwo = dilationFactor * 2.0f; + const pfloat64_t2 dilatedGridExtents = worldSpaceExtents + pfloat64_t2(dilationFactorTimesTwo, dilationFactorTimesTwo); + + float2 uvScale = _static_cast(worldSpaceExtents) / _static_cast(dilatedGridExtents); + float2 uvOffset = float2(-dilationFactor, -dilationFactor) / _static_cast(dilatedGridExtents); + + outV.setImageUV(corner * uvScale + uvOffset); + + pfloat64_t2 topLeftToGridCenterVector = worldSpaceExtents * 0.5; + topLeftToGridCenterVector.y = -topLeftToGridCenterVector.y; + pfloat64_t2 gridCenter = topLeft + topLeftToGridCenterVector; + + pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; + + printf("actual = { %f, %f } dialated = { %f, %f }", _static_cast(uvScale.x), _static_cast(uvScale.y), _static_cast(dilatedVtxPos.x), _static_cast(dilatedVtxPos.y)); + + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, dilatedVtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f);*/ + + outV.setImageUV(corner); + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f); } else if (objType == ObjectType::STREAMED_IMAGE) { From 5d139c4fbaae84decc0ca84b8eac3b070b396b85 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 31 May 2025 12:44:50 +0200 Subject: [PATCH 111/129] Updated media --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index a98646358..286860ce9 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit a9864635879e5a616ac400eecd8b6451b498fbf1 +Subproject commit 286860ce9510571820d5f6d7e14abdd8ac1b22be From 7762984070daa3ac424d320ca91ecd3e8f9f0892 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 31 May 2025 12:50:45 +0200 Subject: [PATCH 112/129] Updated media --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 286860ce9..4d9fcebb1 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 286860ce9510571820d5f6d7e14abdd8ac1b22be +Subproject commit 4d9fcebb12f8c52f61882054b0da9bd60b295ced From 00a1a1dd4f50e6b802b72991cd2437556fe5e65d Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 31 May 2025 15:37:12 +0400 Subject: [PATCH 113/129] GRID DTM Small Fixes --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 4a63d01a8..3491218a9 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -1959,7 +1959,7 @@ bool DrawResourcesFiller::addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, ui DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::GRID_DTM) | (0 << 16)); - //drawObj.geometryAddress = 0; + drawObj.geometryAddress = geometryBufferOffset; drawObjectsToBeFilled[0u] = drawObj; return true; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index eacc4ae64..49e5c7ac1 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -554,13 +554,13 @@ float4 fragMain(PSInput input) : SV_TARGET float2 heightDeriv = fwidth(height); float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); - if (dtmSettings.drawOutlineEnabled()) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); if (dtmSettings.drawContourEnabled()) { - for (uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder + for (int i = dtmSettings.contourSettingsCount-1u; i >= 0; --i) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); } + if (dtmSettings.drawOutlineEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); if (dtmSettings.drawHeightShadingEnabled()) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); From cd802d6ad83510c467b651ca48cdba1fd55c106b Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Sat, 31 May 2025 16:08:40 +0400 Subject: [PATCH 114/129] fixed memcmp in DtmSettings == --- 62_CAD/shaders/globals.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 0ff238289..56c8b438c 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -491,7 +491,7 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) { if (lhs.contourSettingsCount != rhs.contourSettingsCount) return false; - if (!memcmp(lhs.contourSettings, rhs.contourSettings, lhs.contourSettingsCount * sizeof(DTMContourSettings))) + if (memcmp(lhs.contourSettings, rhs.contourSettings, lhs.contourSettingsCount * sizeof(DTMContourSettings))) return false; } @@ -507,9 +507,9 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) return false; - if(!memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) return false; - if(!memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) return false; } From 6340d2f0ca983463721c61f2db35630ac7aa5248 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 2 Jun 2025 11:32:42 +0400 Subject: [PATCH 115/129] logging in DrawResourcesFiller --- 62_CAD/DrawResourcesFiller.cpp | 165 +++++++++++++++++++++++---------- 62_CAD/DrawResourcesFiller.h | 5 +- 2 files changed, 120 insertions(+), 50 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 3491218a9..eaa8eccd2 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -3,9 +3,10 @@ DrawResourcesFiller::DrawResourcesFiller() {} -DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue) : - m_utilities(utils), - m_copyQueue(copyQueue) +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_utilities(std::move(utils)), + m_copyQueue(copyQueue), + m_logger(std::move(logger)) { imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); } @@ -56,7 +57,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, if (memoryTypeIdx == ~0u) { - // TODO: Log, no device local memory found?! weird + m_logger.log("allocateResourcesBuffer: no device local memory type found.", nbl::system::ILogger::ELL_ERROR); assert(false); } @@ -76,7 +77,7 @@ void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, } else { - // LOG: Allocation failure to allocate memory arena for images + m_logger.log("failure to allocate memory arena for images", nbl::system::ILogger::ELL_ERROR); assert(false); } } @@ -88,8 +89,12 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; - assert(maxMSDFs <= logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers); - + if (maxMSDFs > logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers) + { + m_logger.log("requested maxMSDFs is greater than maxImageArrayLayers. lowering the limit...", nbl::system::ILogger::ELL_WARNING); + maxMSDFs = logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers; + } + IPhysicalDevice::SImageFormatPromotionRequest promotionRequest = {}; promotionRequest.originalFormat = msdfFormat; promotionRequest.usages = {}; @@ -176,7 +181,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); if (mainObjectIdx == InvalidMainObjectIdx) { - // TODO: assert or log error here + m_logger.log("drawPolyline: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); assert(false); return; } @@ -227,6 +232,12 @@ void DrawResourcesFiller::drawTriangleMesh( drawCallData.isDTMRendering = true; uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; ICPUBuffer::SCreationParams geometryBuffParams; @@ -297,7 +308,9 @@ void DrawResourcesFiller::drawHatch( textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); if (textureIdx == InvalidTextureIndex) textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); - _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIndex); // probably getHatchFillPatternMSDF returned nullptr + + if (textureIdx == InvalidTextureIndex) + m_logger.log("drawHatch: textureIdx returned invalid index", nbl::system::ILogger::ELL_ERROR); } LineStyleInfo lineStyle = {}; @@ -308,6 +321,13 @@ void DrawResourcesFiller::drawHatch( beginMainObject(MainObjectType::HATCH); uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawHatch: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { @@ -340,8 +360,13 @@ void DrawResourcesFiller::drawFontGlyph( textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), intendedNextSubmit); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - assert(mainObjIdx != InvalidMainObjectIdx); - + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("drawFontGlyph: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); @@ -349,13 +374,17 @@ void DrawResourcesFiller::drawFontGlyph( { // single font glyph couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GlyphInfo + const bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); + if (!success) + { + m_logger.log("addFontGlyph_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } } else { - // TODO: Log, probably getGlyphMSDF(face,glyphIdx) returned nullptr ICPUImage ptr + m_logger.log("drawFontGlyph: textureIdx is invalid.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } } @@ -396,7 +425,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } else { - // TODO[LOG]: ? found static image has empty cpu image, shouldn't happen + m_logger.log("found static image has empty cpu image, shouldn't happen", nbl::system::ILogger::ELL_ERROR); } } @@ -443,7 +472,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { // All attempts to try create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. - // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. + m_logger.log("ensureStaticImageAvailability failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) @@ -469,7 +498,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } else { - // TODO: log here, index allocation failed. + m_logger.log("ensureStaticImageAvailability failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); cachedImageRecord->arrayIndex = InvalidTextureIndex; } } @@ -515,8 +544,6 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( ImageType georeferenceImageType; determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); - assert(georeferenceImageType != ImageType::STATIC); - // imageParams = cpuImage->getCreationParameters(); imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; // promote format because RGB8 and friends don't actually exist in HW @@ -556,12 +583,12 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( } else { - // TODO[LOG] + m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); } } else { - // TODO[LOG] + m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); } } @@ -592,7 +619,8 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( { // All attempts to try create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. - // TODO: Log a warning or error here � `addStaticImage2D` failed, likely due to low VRAM. + + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) @@ -618,7 +646,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( } else { - // TODO: log here, index allocation failed. + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); cachedImageRecord->arrayIndex = InvalidTextureIndex; } } @@ -664,14 +692,23 @@ void DrawResourcesFiller::drawGridDTM( beginMainObject(MainObjectType::GRID_DTM); uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - assert(mainObjectIdx != InvalidMainObjectIdx); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawGridDTM: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } if (!addGridDTM_Internal(gridDTMInfo, mainObjectIdx)) { // single grid DTM couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); - bool success = addGridDTM_Internal(gridDTMInfo, mainObjectIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GridDTMInfo + const bool success = addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + if (!success) + { + m_logger.log("addGridDTM_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } endMainObject(); @@ -682,6 +719,12 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin beginMainObject(MainObjectType::STATIC_IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addImageObject: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } ImageObjectInfo info = {}; info.topLeft = obb.topLeft; @@ -692,8 +735,12 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin { // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addImageObject_Internal(info, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object + const bool success = addImageObject_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addImageObject_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } endMainObject(); @@ -704,6 +751,12 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const Georefer beginMainObject(MainObjectType::STREAMED_IMAGE); uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } GeoreferencedImageInfo info = {}; info.topLeft = params.worldspaceOBB.topLeft; @@ -714,8 +767,12 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const Georefer { // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GeoreferencedImageInfo + const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } endMainObject(); @@ -806,7 +863,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit if (!successCreateNewImage) { - // TODO: Log + m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); success = false; } @@ -954,7 +1011,12 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub { copiedResourcesSize = 0ull; - assert(resourcesCollection.calculateTotalConsumption() <= resourcesGPUBuffer->getSize()); + if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize()) + { + m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool { @@ -963,7 +1025,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { - // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); assert(false); return false; } @@ -985,7 +1047,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { - // TODO: LOG ERROR, this shouldn't happen with correct auto-submission mechanism + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); assert(false); return false; } @@ -1127,7 +1189,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } else { - // TODO: Log no valid command buffer to record into + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); return false; } } @@ -1244,7 +1306,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN imageRecord.state = ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; else { - // TODO: LOG + m_logger.log("Failed `updateImageViaStagingBuffer` in pushStaticImagesUploads.", nbl::system::ILogger::ELL_ERROR); } } @@ -1292,7 +1354,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN if (!success) { - // TODO: Log + m_logger.log("Failure in `pushStaticImagesUploads`.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } return success; @@ -1416,7 +1478,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende if (!success) { - // TODO: Log + m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } return success; @@ -2055,7 +2117,8 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Cach { if (evicted.arrayIndex == InvalidTextureIndex) { - _NBL_DEBUG_BREAK_IF(true); // shouldn't happen under normal circumstances, TODO: LOG warning + m_logger.log("evictImage_SubmitIfNeeded: `evicted.arrayIndex == InvalidTextureIndex` is true, shouldn't happen under normal circumstances.", nbl::system::ILogger::ELL_WARNING); + _NBL_DEBUG_BREAK_IF(true); return; } // Later used to release the image's memory range. @@ -2148,7 +2211,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc else { // irrecoverable error if simple image creation fails. - // TODO[LOG]: that's rare, image view creation failed. + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImageView creation failed, that's rare and irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } @@ -2158,7 +2221,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc else { // irrecoverable error if simple bindImageMemory fails. - // TODO: LOG + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: bindImageMemory failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); break; } @@ -2171,16 +2234,14 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc } else { - // irrecoverable error if memory requirements of the image don't match our preallocated devicememory - // TODO: LOG + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: memory requirements of the gpu image doesn't match our preallocated device memory, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); break; } } else { - // irrecoverable error if simple image creation fails. - // TODO: LOG + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImage creation failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); break; } @@ -2195,8 +2256,8 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc // We give up, it's really nothing we can do, no image to evict (alreadyBlockedForDeferredFrees==1) and no more memory to free up (alreadyBlockedForDeferredFrees). // We probably have evicted almost every other texture except the one we just allocated an index for. // This is most likely due to current image memory requirement being greater than the whole memory allocated for all images + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: failed allocating an image, there is nothing more from mcache to evict, the current memory requirement is simply greater than the whole memory allocated for all images.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); - // TODO[LOG] break; } @@ -2282,12 +2343,18 @@ uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msd uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { if (!cpuImage) - return InvalidTextureIndex; // TODO: Log + { + m_logger.log("addMSDFTexture: cpuImage is nullptr.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } const auto cpuImageSize = cpuImage->getMipSize(0); const bool sizeMatch = cpuImageSize.x == getMSDFResolution().x && cpuImageSize.y == getMSDFResolution().y && cpuImageSize.z == 1u; if (!sizeMatch) - return InvalidTextureIndex; // TODO: Log + { + m_logger.log("addMSDFTexture: cpuImage size doesn't match with msdf array image.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } /* * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. @@ -2355,7 +2422,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor } else { - // TODO: log here, assert will be called in a few lines + m_logger.log("addMSDFTexture: index allocation failed.", nbl::system::ILogger::ELL_ERROR); inserted->alloc_idx = InvalidTextureIndex; } } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index e91ff6413..f482d8435 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -122,7 +122,7 @@ struct DrawResourcesFiller DrawResourcesFiller(); - DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue); + DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); @@ -708,6 +708,9 @@ struct DrawResourcesFiller // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); + // Logger + nbl::system::logger_opt_smart_ptr m_logger = nullptr; + // FrameIndex used as a criteria for resource/image eviction in case of limitations uint32_t currentFrameIndex = 0u; From 9e0448c171e3db745de5ee146cd05c8ec5597781 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 3 Jun 2025 14:46:05 +0200 Subject: [PATCH 116/129] Saving work --- 62_CAD/shaders/globals.hlsl | 2 +- .../main_pipeline/fragment_shader.hlsl | 13 ++++++-- .../shaders/main_pipeline/vertex_shader.hlsl | 32 ++++++++++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 0ff238289..21f33eda3 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -260,7 +260,7 @@ struct GridDTMInfo uint32_t textureID; // 4 bytes (36) float gridCellWidth; // 4 bytes (40) float outlineStipplePatternLengthReciprocal; // 4 bytes (44) - float _padding; // 4 bytes (48) + float thicknessOfTheThickestLine; // 4 bytes (48) }; static uint32_t packR11G11B10_UNORM(float32_t3 color) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index eacc4ae64..0e5ca93c0 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -453,7 +453,8 @@ float4 fragMain(PSInput input) : SV_TARGET // v0-------v2b v2a-------v1 // - // calculate screen space coordinates of vertices of the current tiranlge within the grid + // calculate screen space coordinates of vertices of t + // he current tiranlge within the grid float3 v[3]; nbl::hlsl::shapes::Line outlineLineSegments[2]; float outlinePhaseShift; @@ -553,6 +554,7 @@ float4 fragMain(PSInput input) : SV_TARGET float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; float2 heightDeriv = fwidth(height); + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); if (dtmSettings.drawOutlineEnabled()) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); @@ -561,12 +563,19 @@ float4 fragMain(PSInput input) : SV_TARGET for (uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); } - if (dtmSettings.drawHeightShadingEnabled()) + const bool outOfBoundsUV = uv.x < 0.0f || uv.y < 0.0f || uv.x > 1.0f || uv.y > 1.0f; + if (dtmSettings.drawHeightShadingEnabled() && !outOfBoundsUV) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); textureColor = dtmColor.rgb; localAlpha = dtmColor.a; + /*if (outOfBoundsUV) + textureColor = float3(0.0f, 1.0f, 0.0f); + else + textureColor = float3(0.0f, 0.0f, 1.0f); + + localAlpha = 0.5f;*/ } else if (objType == ObjectType::STREAMED_IMAGE) { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index f01f1ca4b..65f3eea64 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -670,33 +670,42 @@ PSInput main(uint vertexID : SV_VertexID) // TODO: finish implementing grid dilation // TODO: calculate actual thicknessOfTheThickestLine - /*float thicknessOfTheThickestLine = 20.0f; + float thicknessOfTheThickestLine = 200.0f; static const float SquareRootOfTwo = 1.4142135f; const pfloat64_t dilationFactor = SquareRootOfTwo * thicknessOfTheThickestLine; pfloat64_t2 dilationVector = pfloat64_t2(dilationFactor, dilationFactor); + const pfloat64_t dilationFactorTimesTwo = dilationFactor * 2.0f; + const pfloat64_t2 dilatedGridExtents = worldSpaceExtents + pfloat64_t2(dilationFactorTimesTwo, dilationFactorTimesTwo); + const float2 uvScale = _static_cast(worldSpaceExtents) / _static_cast(dilatedGridExtents); + float2 uvOffset = float2(dilationFactor, dilationFactor) / _static_cast(dilatedGridExtents); + uvOffset /= uvScale; + if (corner.x == 0.0f && corner.y == 0.0f) { dilationVector.x = -dilationVector.x; + uvOffset.x = -uvOffset.x; + uvOffset.y = -uvOffset.y; } else if (corner.x == 0.0f && corner.y == 1.0f) { dilationVector.x = -dilationVector.x; dilationVector.y = -dilationVector.y; + uvOffset.x = -uvOffset.x; } else if (corner.x == 1.0f && corner.y == 1.0f) { dilationVector.y = -dilationVector.y; } + else if (corner.x == 1.0f && corner.y == 0.0f) + { + uvOffset.y = -uvOffset.y; + } - const pfloat64_t dilationFactorTimesTwo = dilationFactor * 2.0f; - const pfloat64_t2 dilatedGridExtents = worldSpaceExtents + pfloat64_t2(dilationFactorTimesTwo, dilationFactorTimesTwo); - - float2 uvScale = _static_cast(worldSpaceExtents) / _static_cast(dilatedGridExtents); - float2 uvOffset = float2(-dilationFactor, -dilationFactor) / _static_cast(dilatedGridExtents); - - outV.setImageUV(corner * uvScale + uvOffset); + const float2 uv = corner + uvOffset; + outV.setImageUV(uv); + printf("uv = { %f, %f } scale = { %f, %f }", _static_cast(uv.x), _static_cast(uv.y), _static_cast(uvScale.x), _static_cast(uvScale.y)); pfloat64_t2 topLeftToGridCenterVector = worldSpaceExtents * 0.5; topLeftToGridCenterVector.y = -topLeftToGridCenterVector.y; @@ -704,14 +713,13 @@ PSInput main(uint vertexID : SV_VertexID) pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; - printf("actual = { %f, %f } dialated = { %f, %f }", _static_cast(uvScale.x), _static_cast(uvScale.y), _static_cast(dilatedVtxPos.x), _static_cast(dilatedVtxPos.y)); float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, dilatedVtxPos)); - outV.position = float4(ndcVtxPos, 0.0f, 1.0f);*/ + outV.position = float4(ndcVtxPos, 0.0f, 1.0f); - outV.setImageUV(corner); + /*outV.setImageUV(corner); float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, vtxPos)); - outV.position = float4(ndcVtxPos, 0.0f, 1.0f); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f);*/ } else if (objType == ObjectType::STREAMED_IMAGE) { From 27cd66fe70542eb1147fccf4cd9d4073de493924 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 3 Jun 2025 15:44:59 +0200 Subject: [PATCH 117/129] Implemented grid dilation --- 62_CAD/DrawResourcesFiller.cpp | 18 ++++++++++++++++++ 62_CAD/main.cpp | 7 ++++--- .../shaders/main_pipeline/fragment_shader.hlsl | 1 + .../shaders/main_pipeline/vertex_shader.hlsl | 9 ++++----- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index eaa8eccd2..517334ad9 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -682,6 +682,24 @@ void DrawResourcesFiller::drawGridDTM( gridDTMInfo.gridCellWidth = gridCellWidth; gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + // determine the thickes line + float thickestLineThickness = 0.0f; + + if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) + { + thickestLineThickness = dtmSettingsInfo.outlineStyleInfo.worldSpaceLineWidth + dtmSettingsInfo.outlineStyleInfo.screenSpaceLineWidth; + } + else if (dtmSettingsInfo.mode & E_DTM_MODE::CONTOUR) + { + for (int i = 0; i < dtmSettingsInfo.contourSettingsCount; ++i) + { + const auto& contourLineStyle = dtmSettingsInfo.contourSettings[i].lineStyleInfo; + const float contourLineThickness = contourLineStyle.worldSpaceLineWidth + contourLineStyle.screenSpaceLineWidth; + thickestLineThickness = std::max(thickestLineThickness, contourLineThickness); + } + } + gridDTMInfo.thicknessOfTheThickestLine = thickestLineThickness; + if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) { const bool isOutlineStippled = dtmSettingsInfo.outlineStyleInfo.stipplePatternSize > 0; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 27589f1d2..a3f4016d7 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -372,7 +372,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void allocateResources() { - drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); size_t bufferSize = 512u * 1024u * 1024u; // 512 MB drawResourcesFiller.allocateResourcesBuffer(m_device.get(), bufferSize); @@ -3544,7 +3544,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.mode |= E_DTM_MODE::CONTOUR; dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; - dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 2.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); @@ -3625,6 +3625,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); // draw test polyline +#if 0 { LineStyleInfo style = {}; style.screenSpaceLineWidth = 0.0f; @@ -3644,6 +3645,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); } +#endif } } @@ -3656,7 +3658,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } protected: - std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 4a47a65a1..d4f269413 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -569,6 +569,7 @@ float4 fragMain(PSInput input) : SV_TARGET textureColor = dtmColor.rgb; localAlpha = dtmColor.a; + // test out of bounds draw /*if (outOfBoundsUV) textureColor = float3(0.0f, 1.0f, 0.0f); else diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 65f3eea64..7f669f34b 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -189,7 +189,6 @@ PSInput main(uint vertexID : SV_VertexID) uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; outV.setObjType(objType); outV.setMainObjectIdx(drawObj.mainObjIndex); - MainObject mainObj = loadMainObject(drawObj.mainObjIndex); clipProjectionData = getClipProjectionData(mainObj); @@ -652,6 +651,10 @@ PSInput main(uint vertexID : SV_VertexID) uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2), 8u); float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); + float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + 2u * sizeof(float), 8u); + + // for testing purpose + thicknessOfTheThickestLine += 200.0f; const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); @@ -668,10 +671,6 @@ PSInput main(uint vertexID : SV_VertexID) outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * globals.screenToWorldRatio); outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); - // TODO: finish implementing grid dilation - // TODO: calculate actual thicknessOfTheThickestLine - float thicknessOfTheThickestLine = 200.0f; - static const float SquareRootOfTwo = 1.4142135f; const pfloat64_t dilationFactor = SquareRootOfTwo * thicknessOfTheThickestLine; pfloat64_t2 dilationVector = pfloat64_t2(dilationFactor, dilationFactor); From d1a4e8e8e7f21390a7d817bc0bf7f2d984ac4253 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 7 Jun 2025 15:51:01 +0200 Subject: [PATCH 118/129] Added diagonal mode info to grid DTM height map --- 62_CAD/main.cpp | 26 +++++++++++++ 62_CAD/shaders/globals.hlsl | 28 +++++++++++++ .../main_pipeline/fragment_shader.hlsl | 39 +++++++++++++------ .../shaders/main_pipeline/vertex_shader.hlsl | 5 ++- 4 files changed, 85 insertions(+), 13 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index a3f4016d7..41d8fbfd3 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1261,6 +1261,31 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + + // set diagonals of even cells to TOP_LEFT_TO_BOTTOM_RIGHT and diagonals of odd cells to BOTTOM_LEFT_TO_TOP_RIGHT + { + // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that + + auto* region = gridDTMHeightMap->getRegion(0, core::vectorSIMDu32(0.0f)); + auto imageExtent = region->getExtent(); + auto imagePixelSize = asset::getBytesPerPixel(gridDTMHeightMap->getCreationParameters().format).getIntegerApprox(); + float* imageData = static_cast(gridDTMHeightMap->getBuffer()->getPointer()) + region->bufferOffset; + const size_t imageByteSize = gridDTMHeightMap->getImageDataSizeInBytes(); + assert(imageByteSize % sizeof(float) == 0); + + for (int i = 0; i < imageByteSize; i += sizeof(float)) + { + const bool isCellEven = i % (2 * sizeof(float)) == 0; + E_CELL_DIAGONAL diagonal = isCellEven ? TOP_LEFT_TO_BOTTOM_RIGHT : BOTTOM_LEFT_TO_TOP_RIGHT; + + // test + diagonal = BOTTOM_LEFT_TO_TOP_RIGHT; + + setDiagonalModeBit(imageData, diagonal); + imageData++; + } + } + assert(gridDTMHeightMap); return true; @@ -3735,3 +3760,4 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; NBL_MAIN_FUNC(ComputerAidedDesign) + diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 1397f78e6..2361de5e2 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -263,6 +263,34 @@ struct GridDTMInfo float thicknessOfTheThickestLine; // 4 bytes (48) }; +enum E_CELL_DIAGONAL : uint32_t +{ + TOP_LEFT_TO_BOTTOM_RIGHT = 0u, + BOTTOM_LEFT_TO_TOP_RIGHT = 1u, + INVALID = 2u +}; + +#ifndef __HLSL_VERSION + +// sets last bit of data to 1 or 0 depending on diagonalMode +static void setDiagonalModeBit(float* data, E_CELL_DIAGONAL diagonalMode) +{ + if (diagonalMode == E_CELL_DIAGONAL::INVALID) + return; + + uint32_t dataAsUint = reinterpret_cast(*data); + dataAsUint |= static_cast(diagonalMode); + *data = reinterpret_cast(dataAsUint); +} + +#endif + +// Top left corner holds diagonal mode info of a cell +static E_CELL_DIAGONAL getDiagonalModeFromCellCornerData(float cellCornerData) +{ + return (nbl::hlsl::bit_cast(cellCornerData) & 0x1u) ? BOTTOM_LEFT_TO_TOP_RIGHT : TOP_LEFT_TO_BOTTOM_RIGHT; +} + static uint32_t packR11G11B10_UNORM(float32_t3 color) { // Scale and convert to integers diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index d4f269413..10a2348a7 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,17 +117,8 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } -enum E_CELL_DIAGONAL -{ - TOP_LEFT_TO_BOTTOM_RIGHT, - BOTTOM_LEFT_TO_TOP_RIGHT, - INVALID -}; - E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in float4 cellHeights) { - static const E_CELL_DIAGONAL DefaultDiagonal = TOP_LEFT_TO_BOTTOM_RIGHT; - const bool4 invalidHeights = bool4( isnan(cellHeights.x), isnan(cellHeights.y), @@ -140,15 +131,36 @@ E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in float4 cellHeights) invalidHeightsCount += int(invalidHeights[i]); if (invalidHeightsCount == 0) - return DefaultDiagonal; + { + E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellHeights.w); + + if (a == TOP_LEFT_TO_BOTTOM_RIGHT) + { + uint32_t asdf = nbl::hlsl::bit_cast(cellHeights.w); + printf("a %f %u", cellHeights.w, asdf); + } + else if (a == BOTTOM_LEFT_TO_TOP_RIGHT) + { + uint32_t asdf = nbl::hlsl::bit_cast(cellHeights.w); + printf("b %f %u", cellHeights.w, asdf); + } + else + { + printf("wtf"); + } + + return getDiagonalModeFromCellCornerData(cellHeights.w); + } if (invalidHeightsCount > 1) return INVALID; if (invalidHeights.x || invalidHeights.z) return TOP_LEFT_TO_BOTTOM_RIGHT; - else + else if (invalidHeights.y || invalidHeights.w) return BOTTOM_LEFT_TO_TOP_RIGHT; + + return INVALID; } [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] @@ -488,6 +500,11 @@ float4 fragMain(PSInput input) : SV_TARGET const E_CELL_DIAGONAL cellDiagonal = resolveGridDTMCellDiagonal(cellHeights); const bool diagonalFromTopLeftToBottomRight = cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + /*if (!diagonalFromTopLeftToBottomRight) + printf("a"); + else + printf("b");*/ + if (cellDiagonal == E_CELL_DIAGONAL::INVALID) discard; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 7f669f34b..11c8f8e22 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -654,7 +654,8 @@ PSInput main(uint vertexID : SV_VertexID) float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + 2u * sizeof(float), 8u); // for testing purpose - thicknessOfTheThickestLine += 200.0f; + //thicknessOfTheThickestLine += 200.0f; + thicknessOfTheThickestLine = 0.0f; const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); @@ -704,7 +705,7 @@ PSInput main(uint vertexID : SV_VertexID) const float2 uv = corner + uvOffset; outV.setImageUV(uv); - printf("uv = { %f, %f } scale = { %f, %f }", _static_cast(uv.x), _static_cast(uv.y), _static_cast(uvScale.x), _static_cast(uvScale.y)); + /*printf("uv = { %f, %f } scale = { %f, %f }", _static_cast(uv.x), _static_cast(uv.y), _static_cast(uvScale.x), _static_cast(uvScale.y));*/ pfloat64_t2 topLeftToGridCenterVector = worldSpaceExtents * 0.5; topLeftToGridCenterVector.y = -topLeftToGridCenterVector.y; From 47dec6df8d84c2e60e0f2df813aa32eedcea4ae7 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 9 Jun 2025 12:39:09 +0400 Subject: [PATCH 119/129] IUtilities constructor to static create function --- 05_StreamingAndBufferDeviceAddressApp/main.cpp | 4 ++-- 11_FFT/main.cpp | 2 +- 28_FFTBloom/main.cpp | 2 +- old_to_refactor/20_Megatexture/main.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index e8f7dbd33..7fa72235b 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -117,8 +117,8 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M // `CAsyncSingleBufferSubAllocator` just allows you suballocate subranges of any `IGPUBuffer` range with deferred/latched frees. constexpr uint32_t DownstreamBufferSize = sizeof(output_t)<<23; constexpr uint32_t UpstreamBufferSize = sizeof(input_t)<<23; - - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); + + m_utils = IUtilities::create(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 80f5f856c..b10efbf31 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -96,7 +96,7 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; constexpr uint32_t UpstreamBufferSize = sizeof(scalar_t) << 23; - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index cc312c3be..fddb45586 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -461,7 +461,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app assert(m_kerImageView); // Going to need an IUtils to perform uploads/downloads - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); // Now convert uploads // Get graphics queue for image transfer diff --git a/old_to_refactor/20_Megatexture/main.cpp b/old_to_refactor/20_Megatexture/main.cpp index 35d0692af..5c309ff24 100644 --- a/old_to_refactor/20_Megatexture/main.cpp +++ b/old_to_refactor/20_Megatexture/main.cpp @@ -684,7 +684,7 @@ APP_CONSTRUCTOR(MegaTextureApp) video::IGPUBuffer::SCreationParams bufferCreationParams; bufferCreationParams.usage = asset::IBuffer::EUF_STORAGE_BUFFER_BIT; bufferCreationParams.size = sizeof(video::IGPUVirtualTexture::SPrecomputedData); - core::smart_refctd_ptr utilities = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice)); + core::smart_refctd_ptr utilities = video::IUtilities::create(core::smart_refctd_ptr(logicalDevice)); core::smart_refctd_ptr buffer = utilities->createFilledDeviceLocalBufferOnDedMem(queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], std::move(bufferCreationParams), &gpuvt->getPrecomputedData()); { From 07774344371d182cbcbc716928375cca29356521 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 9 Jun 2025 14:04:11 +0400 Subject: [PATCH 120/129] allocateDrawResourcesWithinAvailableVRAM --- 62_CAD/DrawResourcesFiller.cpp | 160 ++++++++++++++++++++++++--------- 62_CAD/DrawResourcesFiller.h | 46 ++++++++-- 62_CAD/main.cpp | 5 +- 3 files changed, 157 insertions(+), 54 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 517334ad9..1d0eaaf16 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -25,66 +25,131 @@ void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); } -void DrawResourcesFiller::allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size) -{ - // TODO: Make this function failable and report insufficient memory if less that getMinimumRequiredResourcesBufferSize, TODO: Have retry mechanism to allocate less mem - // TODO: Allocate buffer memory and image memory with 1 allocation, so that failure and retries are more straightforward. - size = core::alignUp(size, ResourcesMaxNaturalAlignment); - size = core::max(size, getMinimumRequiredResourcesBufferSize()); - // size = 368u; STRESS TEST - IGPUBuffer::SCreationParams geometryCreationParams = {}; - geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; - resourcesGPUBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) +{ + // single memory allocation sectioned into images+buffers (images start at offset=0) + const size_t adjustedImagesMemorySize = core::alignUp(requiredImageMemorySize, GPUStructsMaxNaturalAlignment); + const size_t adjustedBuffersMemorySize = core::max(requiredBufferMemorySize, getMinimumRequiredResourcesBufferSize()); + const size_t totalResourcesSize = adjustedImagesMemorySize + adjustedBuffersMemorySize; + + IGPUBuffer::SCreationParams resourcesBufferCreationParams = {}; + resourcesBufferCreationParams.size = adjustedBuffersMemorySize; + resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto mem = logicalDevice->allocate(memReq, resourcesGPUBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); + const bool memoryRequirementsMatch = + (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation + + if (!memoryRequirementsMatch) + { + m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); + + uint32_t memoryTypeIdx = ~0u; - // Allocate for Images + video::IDeviceMemoryAllocator::SAllocation allocation = {}; + for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) { - const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); - uint32_t memoryTypeIdx = ~0u; - for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) { - if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + memoryTypeIdx = i; + + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { - memoryTypeIdx = i; + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) break; - } } + } - if (memoryTypeIdx == ~0u) - { - m_logger.log("allocateResourcesBuffer: no device local memory type found.", nbl::system::ILogger::ELL_ERROR); - assert(false); - } + if (memoryTypeIdx == ~0u) + { + m_logger.log("allocateResourcesBuffer: no device local memory type found!", nbl::system::ILogger::ELL_ERROR); + return false; + } - IDeviceMemoryAllocator::SAllocateInfo allocationInfo = - { - // TODO: Get from user side. - .size = 65 * 1024 * 1024, // 70 MB - .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, - .memoryTypeIndex = memoryTypeIdx, - .dedication = nullptr, - }; - imagesMemoryArena = logicalDevice->allocate(allocationInfo); + if (!allocation.isValid()) + return false; - if (imagesMemoryArena.isValid()) - { - imagesMemorySubAllocator = core::make_smart_refctd_ptr(static_cast(allocationInfo.size)); - } - else - { - m_logger.log("failure to allocate memory arena for images", nbl::system::ILogger::ELL_ERROR); - assert(false); + imagesMemoryArena = { + .memory = allocation.memory, + .offset = allocation.offset, + }; + + buffersMemoryArena = { + .memory = allocation.memory, + .offset = core::alignUp(allocation.offset + adjustedImagesMemorySize, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation + }; + + imagesMemorySubAllocator = core::make_smart_refctd_ptr(adjustedImagesMemorySize); + + video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = { + .buffer = resourcesGPUBuffer.get(), + .binding = { + .memory = buffersMemoryArena.memory.get(), + .offset = buffersMemoryArena.offset, } + }; + + if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory)) + { + m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR); + return false; } + return true; } -void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent, uint32_t maxTries) +{ + const size_t minimumAcceptableSize = core::max(MinimumDrawResourcesMemorySize, getMinimumRequiredResourcesBufferSize()); + + size_t currentBufferSize = maxBufferMemorySize; + size_t currentImageSize = maxImageMemorySize; + const size_t totalInitialSize = currentBufferSize + currentImageSize; + + // If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved + if (totalInitialSize < minimumAcceptableSize) + { + // Preserve ratio: R = buffer / (buffer + image) + // scaleFactor = minimumAcceptableSize / totalInitialSize; + const double scaleFactor = static_cast(minimumAcceptableSize) / totalInitialSize; + currentBufferSize = static_cast(currentBufferSize * scaleFactor); + currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum + } + + uint32_t numTries = 0u; + while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) + { + if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + return true; + + currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; + currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; + numTries++; + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); + } + + m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); + return false; +} + +bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; @@ -116,7 +181,10 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui auto image = logicalDevice->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); imageMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(imageMemReqs, image.get()); + const auto allocation = logicalDevice->allocate(imageMemReqs, image.get()); + + if (!allocation.isValid()) + return false; image->setObjectDebugName("MSDFs Texture Array"); @@ -134,9 +202,13 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui msdfTextureArray = logicalDevice->createImageView(std::move(imgViewInfo)); } + if (!msdfTextureArray) + return false; + msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); msdfImagesState.resize(maxMSDFs); + return true; } void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f482d8435..981facaec 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -29,7 +29,8 @@ struct DrawResourcesFiller public: // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses - static constexpr size_t ResourcesMaxNaturalAlignment = 8u; + static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; + static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources struct ResourceBase @@ -38,7 +39,7 @@ struct DrawResourcesFiller size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued virtual size_t getCount() const = 0; virtual size_t getStorageSize() const = 0; - virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), ResourcesMaxNaturalAlignment); } + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); } }; /// @brief ResourceBase reserved for compute shader stages input/output @@ -67,11 +68,11 @@ struct DrawResourcesFiller } /// @brief increases size of general-purpose resources that hold bytes - /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= ResourcesMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector /// @return pointer to start of the data to be filled, up to additional size size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) { - assert(core::isPoT(alignment) && alignment <= ResourcesMaxNaturalAlignment); + assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); size_t offset = core::alignUp(vector.size(), alignment); vector.resize(offset + additionalSize); return offset; @@ -104,7 +105,7 @@ struct DrawResourcesFiller CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) - // Get Total memory consumption, If all ResourcesCollection get packed together with ResourcesMaxNaturalAlignment + // Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment // used to decide the remaining memory and when to overflow size_t calculateTotalConsumption() const { @@ -135,12 +136,40 @@ struct DrawResourcesFiller { // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) - return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(WorldClipRect) + sizeof(float64_t3x3) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, ResourcesMaxNaturalAlignment); + return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(WorldClipRect) + sizeof(float64_t3x3) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, GPUStructsMaxNaturalAlignment); } - void allocateResourcesBuffer(ILogicalDevice* logicalDevice, size_t size); + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); + + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); - void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); + bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); // functions that user should set to get MSDF texture if it's not available in cache. // it's up to user to return cached or generate on the fly. @@ -723,6 +752,7 @@ struct DrawResourcesFiller // ResourcesCollection and packed into GPUBuffer ResourcesCollection resourcesCollection; + IDeviceMemoryAllocator::SAllocation buffersMemoryArena; nbl::core::smart_refctd_ptr resourcesGPUBuffer; size_t copiedResourcesSize; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 41d8fbfd3..7d8ccb67d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -374,8 +374,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); - size_t bufferSize = 512u * 1024u * 1024u; // 512 MB - drawResourcesFiller.allocateResourcesBuffer(m_device.get(), bufferSize); + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { From 307f7f9b6e2c42737511eff831ad747cc5ba8de2 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Thu, 12 Jun 2025 13:08:32 +0400 Subject: [PATCH 121/129] un-premultiply alpha --- 62_CAD/shaders/main_pipeline/fragment_shader.hlsl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 10a2348a7..b6a1e7078 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -198,9 +198,8 @@ float4 fragMain(PSInput input) : SV_TARGET } if (dtmSettings.drawHeightShadingEnabled()) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); - - textureColor = dtmColor.rgb; + textureColor = dtmColor.rgb / dtmColor.a; localAlpha = dtmColor.a; gammaUncorrect(textureColor); // want to output to SRGB without gamma correction @@ -583,7 +582,7 @@ float4 fragMain(PSInput input) : SV_TARGET if (dtmSettings.drawHeightShadingEnabled() && !outOfBoundsUV) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); - textureColor = dtmColor.rgb; + textureColor = dtmColor.rgb / dtmColor.a; localAlpha = dtmColor.a; // test out of bounds draw From f0c2d5ef74bb8f0b9c1e0d00ff66505e75753a50 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 12 Jun 2025 16:26:03 +0200 Subject: [PATCH 122/129] Fixed DTM diagonal info flushing --- 62_CAD/DrawResourcesFiller.cpp | 12 ++++-- 62_CAD/DrawResourcesFiller.h | 6 ++- 62_CAD/Images.h | 1 + 62_CAD/main.cpp | 33 ++++++++++---- 62_CAD/shaders/globals.hlsl | 8 +++- 62_CAD/shaders/main_pipeline/common.hlsl | 1 + .../main_pipeline/fragment_shader.hlsl | 43 +++++++------------ .../shaders/main_pipeline/vertex_shader.hlsl | 3 +- 8 files changed, 61 insertions(+), 46 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 517334ad9..5ea9d3adf 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -456,7 +456,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, intendedNextSubmit, std::to_string(staticImage.imageID)); + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, staticImage.imageViewFormatOverride, intendedNextSubmit, std::to_string(staticImage.imageID)); if (allocResults.isValid()) { @@ -603,7 +603,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, intendedNextSubmit, std::to_string(imageID)); + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); if (allocResults.isValid()) { @@ -2171,7 +2171,11 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Cach } } -DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName) +DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded( + const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName) { ImageAllocateResults ret = {}; @@ -2218,7 +2222,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc IGPUImageView::SCreationParams viewParams = { .image = gpuImage, .viewType = IGPUImageView::ET_2D, - .format = gpuImage->getCreationParameters().format + .format = (imageViewFormatOverride == asset::E_FORMAT::EF_COUNT) ? gpuImage->getCreationParameters().format : EF_R32G32B32A32_UINT }; ret.gpuImageView = device->createImageView(std::move(viewParams)); if (ret.gpuImageView) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f482d8435..f8e4bee67 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -560,6 +560,7 @@ struct DrawResourcesFiller * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. * * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. * @@ -568,7 +569,10 @@ struct DrawResourcesFiller * - `allocationSize`: Size of the allocated memory region. * - `gpuImageView`: The created GPU image view (nullptr if creation failed). */ - ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string debugName = "UnnamedNablaImage"); + ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName); /** * @brief Determines creation parameters for a georeferenced image based on heuristics. diff --git a/62_CAD/Images.h b/62_CAD/Images.h index bb7b7d3ae..a341eadd6 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -215,4 +215,5 @@ struct StaticImageInfo image_id imageID = ~0ull; core::smart_refctd_ptr cpuImage = nullptr; bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + asset::E_FORMAT imageViewFormatOverride = asset::E_FORMAT::EF_COUNT; // if asset::E_FORMAT::EF_COUNT then image view will have the same format as `cpuImage` }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 41d8fbfd3..82f70f8e6 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1153,6 +1153,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu "../../media/color_space_test/R8G8B8A8_1.png", }; + /** + * @param formatOverride override format of an image view, use special argument asset::E_FORMAT::EF_COUNT to don't override image view format and use one retrieved from the loaded image + */ auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr { constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); @@ -1172,6 +1175,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu case IAsset::ET_IMAGE: { auto image = smart_refctd_ptr_static_cast(asset); + auto& flags = image->getCreationParameters().flags; + // assert if asset is mutable + const_cast&>(flags) |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; const auto format = image->getCreationParameters().format; ICPUImageView::SCreationParams viewParams = { @@ -1199,7 +1205,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu return nullptr; } - const auto loadedCPUImage = cpuImgView->getCreationParameters().image; const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); @@ -1262,7 +1267,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); - // set diagonals of even cells to TOP_LEFT_TO_BOTTOM_RIGHT and diagonals of odd cells to BOTTOM_LEFT_TO_TOP_RIGHT + // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly { // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that @@ -1273,17 +1278,19 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu const size_t imageByteSize = gridDTMHeightMap->getImageDataSizeInBytes(); assert(imageByteSize % sizeof(float) == 0); + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, 1); + for (int i = 0; i < imageByteSize; i += sizeof(float)) { - const bool isCellEven = i % (2 * sizeof(float)) == 0; - E_CELL_DIAGONAL diagonal = isCellEven ? TOP_LEFT_TO_BOTTOM_RIGHT : BOTTOM_LEFT_TO_TOP_RIGHT; - - // test - diagonal = BOTTOM_LEFT_TO_TOP_RIGHT; + const bool isTexelEven = static_cast(dist(mt)); + E_CELL_DIAGONAL diagonal = isTexelEven ? TOP_LEFT_TO_BOTTOM_RIGHT : BOTTOM_LEFT_TO_TOP_RIGHT; setDiagonalModeBit(imageData, diagonal); imageData++; } + } assert(gridDTMHeightMap); @@ -3572,7 +3579,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 2.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; - dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + //dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); dtmInfo.contourSettingsCount = 2u; dtmInfo.contourSettings[0u].startHeight = 20; @@ -3645,7 +3652,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu worldSpaceExtents.x = (heightMapExtent.width - 1) * HeightMapCellWidth; worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; const uint64_t heightMapTextureID = 0ull; - if (!drawResourcesFiller.ensureStaticImageAvailability({ heightMapTextureID, gridDTMHeightMap }, intendedNextSubmit)) + + StaticImageInfo heightMapStaticImageInfo = { + .imageID = heightMapTextureID, + .cpuImage = gridDTMHeightMap, + .forceUpdate = false, + .imageViewFormatOverride = asset::E_FORMAT::EF_R32G32B32A32_UINT // for now we use only R32G32B32A32_* anyway + }; + + if (!drawResourcesFiller.ensureStaticImageAvailability(heightMapStaticImageInfo, intendedNextSubmit)) m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 2361de5e2..cae5210b8 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -279,16 +279,20 @@ static void setDiagonalModeBit(float* data, E_CELL_DIAGONAL diagonalMode) return; uint32_t dataAsUint = reinterpret_cast(*data); + constexpr uint32_t HEIGHT_VALUE_MASK = 0xFFFFFFFEu; + dataAsUint &= HEIGHT_VALUE_MASK; dataAsUint |= static_cast(diagonalMode); *data = reinterpret_cast(dataAsUint); + + uint32_t dataAsUintDbg = reinterpret_cast(*data); } #endif // Top left corner holds diagonal mode info of a cell -static E_CELL_DIAGONAL getDiagonalModeFromCellCornerData(float cellCornerData) +static E_CELL_DIAGONAL getDiagonalModeFromCellCornerData(uint32_t cellCornerData) { - return (nbl::hlsl::bit_cast(cellCornerData) & 0x1u) ? BOTTOM_LEFT_TO_TOP_RIGHT : TOP_LEFT_TO_BOTTOM_RIGHT; + return (cellCornerData & 0x1u) ? BOTTOM_LEFT_TO_TOP_RIGHT : TOP_LEFT_TO_BOTTOM_RIGHT; } static uint32_t packR11G11B10_UNORM(float32_t3 color) diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 79dbc0bd1..69f9a8ec8 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -258,6 +258,7 @@ struct PSInput [[vk::binding(2, 0)]] SamplerState textureSampler : register(s5); [[vk::binding(3, 0)]] Texture2D textures[ImagesBindingArraySize] : register(t5); +[[vk::binding(3, 0)]] Texture2D texturesU32[ImagesBindingArraySize] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 10a2348a7..63ed5e915 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,8 +117,10 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } -E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in float4 cellHeights) +E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) { + float4 cellHeights = asfloat(cellData); + const bool4 invalidHeights = bool4( isnan(cellHeights.x), isnan(cellHeights.y), @@ -132,24 +134,8 @@ E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in float4 cellHeights) if (invalidHeightsCount == 0) { - E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellHeights.w); - - if (a == TOP_LEFT_TO_BOTTOM_RIGHT) - { - uint32_t asdf = nbl::hlsl::bit_cast(cellHeights.w); - printf("a %f %u", cellHeights.w, asdf); - } - else if (a == BOTTOM_LEFT_TO_TOP_RIGHT) - { - uint32_t asdf = nbl::hlsl::bit_cast(cellHeights.w); - printf("b %f %u", cellHeights.w, asdf); - } - else - { - printf("wtf"); - } - - return getDiagonalModeFromCellCornerData(cellHeights.w); + E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellData.w); + return getDiagonalModeFromCellCornerData(cellData.w); } if (invalidHeightsCount > 1) @@ -203,6 +189,9 @@ float4 fragMain(PSInput input) : SV_TARGET textureColor = dtmColor.rgb; localAlpha = dtmColor.a; + // because final color is premultiplied by alpha + textureColor = dtmColor.rgb / dtmColor.a; + gammaUncorrect(textureColor); // want to output to SRGB without gamma correction return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); } @@ -487,24 +476,21 @@ float4 fragMain(PSInput input) : SV_TARGET float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? const float InvalidHeightValue = asfloat(0x7FC00000); + uint32_t4 cellData; float4 cellHeights = float4(InvalidHeightValue, InvalidHeightValue, InvalidHeightValue, InvalidHeightValue); if (textureId != InvalidTextureIndex) { const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; - cellHeights = textures[NonUniformResourceIndex(textureId)].Gather(textureSampler, float2(location.x, location.y), 0); + cellData = texturesU32[NonUniformResourceIndex(textureId)].Gather(textureSampler, float2(location.x, location.y), 0); + cellHeights = asfloat(cellData); } - const E_CELL_DIAGONAL cellDiagonal = resolveGridDTMCellDiagonal(cellHeights); + const E_CELL_DIAGONAL cellDiagonal = resolveGridDTMCellDiagonal(cellData); const bool diagonalFromTopLeftToBottomRight = cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; - /*if (!diagonalFromTopLeftToBottomRight) - printf("a"); - else - printf("b");*/ - if (cellDiagonal == E_CELL_DIAGONAL::INVALID) discard; @@ -515,8 +501,6 @@ float4 fragMain(PSInput input) : SV_TARGET float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; - //printf("uv = { %f, %f } diagonalTLtoBR = %i triangleA = %i, insiceCellCoords = { %f, %f }", uv.x, uv.y, int(diagonalFromTopLeftToBottomRight), int(triangleA), insideCellCoord.x / cellWidth, insideCellCoord.y / cellWidth); - if (diagonalFromTopLeftToBottomRight) { v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); @@ -586,6 +570,9 @@ float4 fragMain(PSInput input) : SV_TARGET textureColor = dtmColor.rgb; localAlpha = dtmColor.a; + // because final color is premultiplied by alpha + textureColor = dtmColor.rgb / dtmColor.a; + // test out of bounds draw /*if (outOfBoundsUV) textureColor = float3(0.0f, 1.0f, 0.0f); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 11c8f8e22..1cc75c570 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -654,8 +654,7 @@ PSInput main(uint vertexID : SV_VertexID) float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + 2u * sizeof(float), 8u); // for testing purpose - //thicknessOfTheThickestLine += 200.0f; - thicknessOfTheThickestLine = 0.0f; + thicknessOfTheThickestLine += 200.0f; const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y); From fbf674031e2f16b2ee79305094ad3a45b6051c6c Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Fri, 13 Jun 2025 14:05:09 +0400 Subject: [PATCH 123/129] Bringing Francisco's changes to DrawResourcesFiller --- 62_CAD/DrawResourcesFiller.cpp | 87 +++++++++++++++---- 62_CAD/DrawResourcesFiller.h | 39 ++++++++- .../main_pipeline/fragment_shader_debug.hlsl | 3 - 3 files changed, 107 insertions(+), 22 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ed46600e6..b540d9257 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -369,6 +369,71 @@ void DrawResourcesFiller::drawHatch( const float32_t4& color, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const en::nabla2d::Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const en::nabla2d::HatchFillPattern fillPattern, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) + + // if backgroundColor is visible + drawFixedGeometryHatch(hatch, backgroundColor, transformation, transformationType, intendedNextSubmit); + // if foregroundColor is visible + drawFixedGeometryHatch(hatch, foregroundColor, fillPattern, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!activeProjections.empty()) + { + // if there is already an active custom projection, it should be considered into the transformation of the fixed geometry polyline + float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); + pushCustomProjection(newTransformation); + } + else + { + // will be multiplied by the default projection matrix from the left (in shader), no need to consider it here + pushCustomProjection(transformation); + } + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit, transformationType); + popCustomProjection(); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + drawFixedGeometryHatch(hatch, color, HatchFillPattern::SOLID_FILL, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + en::nabla2d::TransformationType transformationType) { if (color.a == 0.0f) // not visible return; @@ -380,26 +445,17 @@ void DrawResourcesFiller::drawHatch( textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); if (textureIdx == InvalidTextureIndex) textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); - - if (textureIdx == InvalidTextureIndex) - m_logger.log("drawHatch: textureIdx returned invalid index", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIndex); // probably getHatchFillPatternMSDF returned nullptr } LineStyleInfo lineStyle = {}; lineStyle.color = color; lineStyle.screenSpaceLineWidth = nbl::hlsl::bit_cast(textureIdx); - + setActiveLineStyle(lineStyle); - beginMainObject(MainObjectType::HATCH); - - uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - if (mainObjectIdx == InvalidMainObjectIdx) - { - m_logger.log("drawHatch: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } + beginMainObject(MainObjectType::HATCH, transformationType); + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { @@ -411,11 +467,6 @@ void DrawResourcesFiller::drawHatch( endMainObject(); } -void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) -{ - drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); -} - void DrawResourcesFiller::drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index dd24ea2e9..1babd7d7a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -180,7 +180,7 @@ struct DrawResourcesFiller // Must be called at the end of each frame. // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. - // Registers the semaphore/value that will signal completion of this frame’s draw, + // Registers the semaphore/value that will signal completion of this frame�s draw, // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; void markFrameUsageComplete(uint64_t drawSubmitWaitValue); @@ -227,6 +227,33 @@ struct DrawResourcesFiller const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); + //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background + void drawFixedGeometryHatch( + const en::nabla2d::Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const en::nabla2d::HatchFillPattern fillPattern, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + // ! Fixed-geometry Hatch with MSDF Pattern + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + // ! Solid Fill Fixed-geometry Hatch + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + en::nabla2d::TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + /// Used by SingleLineText, Issue drawing a font glyph /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( @@ -616,6 +643,16 @@ struct DrawResourcesFiller */ void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); + /** + * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter + */ + void drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + en::nabla2d::TransformationType transformationType = en::nabla2d::TransformationType::TT_NORMAL); + void resetMainObjects() { resourcesCollection.mainObjects.vector.clear(); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl index 7dba46dd0..2955d22fe 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl @@ -1,9 +1,6 @@ struct PSInputDebug { float4 position : SV_Position; - [[vk::location(0)]] float4 color : COLOR; - [[vk::location(1)]] nointerpolation float4 start_end : COLOR1; - [[vk::location(2)]] nointerpolation uint3 lineWidth_eccentricity_objType : COLOR2; }; [shader("pixel")] From 3866e2dc1da9b3ac3d1a0770c2724931abd5af61 Mon Sep 17 00:00:00 2001 From: Przemek Date: Sat, 14 Jun 2025 13:14:58 +0200 Subject: [PATCH 124/129] Saving work --- 62_CAD/DrawResourcesFiller.cpp | 6 - 62_CAD/main.cpp | 2 +- 62_CAD/shaders/globals.hlsl | 14 +- 62_CAD/shaders/main_pipeline/common.hlsl | 2 - 62_CAD/shaders/main_pipeline/dtm.hlsl | 33 ++++ .../main_pipeline/fragment_shader.hlsl | 142 +++++++----------- .../shaders/main_pipeline/vertex_shader.hlsl | 4 +- 7 files changed, 101 insertions(+), 102 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ed46600e6..3935e26d3 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -772,12 +772,6 @@ void DrawResourcesFiller::drawGridDTM( } gridDTMInfo.thicknessOfTheThickestLine = thickestLineThickness; - if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) - { - const bool isOutlineStippled = dtmSettingsInfo.outlineStyleInfo.stipplePatternSize > 0; - gridDTMInfo.outlineStipplePatternLengthReciprocal = isOutlineStippled ? dtmSettingsInfo.outlineStyleInfo.reciprocalStipplePatternLen : 0.0f; - } - setActiveDTMSettings(dtmSettingsInfo); beginMainObject(MainObjectType::GRID_DTM); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index cd53d402c..49f6090e7 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3580,7 +3580,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 2.0f; dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; - //dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); dtmInfo.contourSettingsCount = 2u; dtmInfo.contourSettings[0u].startHeight = 20; diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index cae5210b8..255c46d8a 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -43,6 +43,16 @@ struct PushConstants uint32_t isDTMRendering; }; +#ifdef __HLSL_VERSION +NBL_CONSTEXPR float InvalidGridDTMHeightValue = asfloat(0x7FC00000); + +bool isInvalidGridDtmHeightValue(float value) +{ + return isnan(value); +} + +#endif + struct WorldClipRect { pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) @@ -259,8 +269,8 @@ struct GridDTMInfo pfloat64_t2 worldSpaceExtents; // 16 bytes (32) uint32_t textureID; // 4 bytes (36) float gridCellWidth; // 4 bytes (40) - float outlineStipplePatternLengthReciprocal; // 4 bytes (44) - float thicknessOfTheThickestLine; // 4 bytes (48) + float thicknessOfTheThickestLine; // 4 bytes (44) + float _padding; // 4 bytes (48) }; enum E_CELL_DIAGONAL : uint32_t diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 69f9a8ec8..f378c44db 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -236,14 +236,12 @@ struct PSInput float2 getGridDTMScreenSpaceTopLeft() { return data2.xy; } float2 getGridDTMScreenSpaceGridExtents() { return data2.zw; } float getGridDTMScreenSpaceCellWidth() { return data3.x; } - float getGridDTMOutlineStipplePatternLengthReciprocal() { return data3.y; } float2 getGridDTMScreenSpacePosition() { return interp_data5.zw; } void setGridDTMHeightTextureID(uint textureID) { data1.z = textureID; } void setGridDTMScreenSpaceTopLeft(float2 screenSpaceTopLeft) { data2.xy = screenSpaceTopLeft; } void setGridDTMScreenSpaceGridExtents(float2 screenSpaceGridExtends) { data2.zw = screenSpaceGridExtends; } void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data3.x = screenSpaceGridWidth; } - void setGridDTMOutlineStipplePatternLengthReciprocal(float outlineStipplePatternLength) { data3.y = outlineStipplePatternLength; } void setGridDTMScreenSpacePosition(float2 screenSpacePosition) { interp_data5.zw = screenSpacePosition; } }; diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 839b5483e..0aced1b89 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -417,6 +417,39 @@ float4 blendUnder(in float4 dstColor, in float4 srcColor) return dstColor; } + +E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) +{ + float4 cellHeights = asfloat(cellData); + + const bool4 invalidHeights = bool4( + isInvalidGridDtmHeightValue(cellHeights.x), + isInvalidGridDtmHeightValue(cellHeights.y), + isInvalidGridDtmHeightValue(cellHeights.z), + isInvalidGridDtmHeightValue(cellHeights.w) + ); + + int invalidHeightsCount = 0; + for (int i = 0; i < 4; ++i) + invalidHeightsCount += int(invalidHeights[i]); + + if (invalidHeightsCount == 0) + { + E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellData.w); + return getDiagonalModeFromCellCornerData(cellData.w); + } + + if (invalidHeightsCount > 1) + return INVALID; + + if (invalidHeights.x || invalidHeights.z) + return TOP_LEFT_TO_BOTTOM_RIGHT; + else if (invalidHeights.y || invalidHeights.w) + return BOTTOM_LEFT_TO_TOP_RIGHT; + + return INVALID; +} + } #endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 3bb6d0dad..aca52e937 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,38 +117,6 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } -E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) -{ - float4 cellHeights = asfloat(cellData); - - const bool4 invalidHeights = bool4( - isnan(cellHeights.x), - isnan(cellHeights.y), - isnan(cellHeights.z), - isnan(cellHeights.w) - ); - - int invalidHeightsCount = 0; - for (int i = 0; i < 4; ++i) - invalidHeightsCount += int(invalidHeights[i]); - - if (invalidHeightsCount == 0) - { - E_CELL_DIAGONAL a = getDiagonalModeFromCellCornerData(cellData.w); - return getDiagonalModeFromCellCornerData(cellData.w); - } - - if (invalidHeightsCount > 1) - return INVALID; - - if (invalidHeights.x || invalidHeights.z) - return TOP_LEFT_TO_BOTTOM_RIGHT; - else if (invalidHeights.y || invalidHeights.w) - return BOTTOM_LEFT_TO_TOP_RIGHT; - - return INVALID; -} - [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -439,10 +407,27 @@ float4 fragMain(PSInput input) : SV_TARGET DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + if (!dtmSettings.drawContourEnabled() && !dtmSettings.drawOutlineEnabled() && !dtmSettings.drawHeightShadingEnabled()) + discard; + float2 pos = input.getGridDTMScreenSpacePosition(); float2 uv = input.getImageUV(); const uint32_t textureId = input.getGridDTMHeightTextureID(); + float2 topLeft = input.getGridDTMScreenSpaceTopLeft(); + float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); + const float cellWidth = input.getGridDTMScreenSpaceCellWidth(); + + float2 gridSpacePos = uv * gridExtents; + float2 cellCoords; + { + float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; + cellCoords.x = uint32_t(gridSpacePosDivGridCellWidth.x); + cellCoords.y = uint32_t(gridSpacePosDivGridCellWidth.y); + } + + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + // grid consists of square cells and cells are divided into two triangles: // depending on mode it is // either: or: @@ -453,30 +438,17 @@ float4 fragMain(PSInput input) : SV_TARGET // v0-------v2b v2a-------v1 // - // calculate screen space coordinates of vertices of t - // he current tiranlge within the grid - float3 v[3]; - nbl::hlsl::shapes::Line outlineLineSegments[2]; - float outlinePhaseShift; + // calculate screen space coordinates of vertices of the current tiranlge within the grid + float3 currentTriangleVertices[3]; { - float2 topLeft = input.getGridDTMScreenSpaceTopLeft(); - float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); - float cellWidth = input.getGridDTMScreenSpaceCellWidth(); - - float2 gridSpacePos = uv * gridExtents; - - float2 cellCoords; - { - float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; - cellCoords.x = uint32_t(gridSpacePosDivGridCellWidth.x); - cellCoords.y = uint32_t(gridSpacePosDivGridCellWidth.y); - } - float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? - const float InvalidHeightValue = asfloat(0x7FC00000); uint32_t4 cellData; - float4 cellHeights = float4(InvalidHeightValue, InvalidHeightValue, InvalidHeightValue, InvalidHeightValue); + // cellHeihts.x - bottom left texel + // cellHeihts.y - bottom right texel + // cellHeihts.z - top right texel + // cellHeihts.w - top left texel + float4 cellHeights = float4(InvalidGridDTMHeightValue, InvalidGridDTMHeightValue, InvalidGridDTMHeightValue, InvalidGridDTMHeightValue); if (textureId != InvalidTextureIndex) { const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); @@ -486,8 +458,7 @@ float4 fragMain(PSInput input) : SV_TARGET cellHeights = asfloat(cellData); } - - const E_CELL_DIAGONAL cellDiagonal = resolveGridDTMCellDiagonal(cellData); + const E_CELL_DIAGONAL cellDiagonal = dtm::resolveGridDTMCellDiagonal(cellData); const bool diagonalFromTopLeftToBottomRight = cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; if (cellDiagonal == E_CELL_DIAGONAL::INVALID) @@ -498,22 +469,20 @@ float4 fragMain(PSInput input) : SV_TARGET insideCellCoord.x < insideCellCoord.y : insideCellCoord.x < cellWidth - insideCellCoord.y; - float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; - if (diagonalFromTopLeftToBottomRight) { - v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); - v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); - v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); + currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); } else { - v[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); - v[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); - v[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); + currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); } - bool isTriangleInvalid = isnan(v[0].z) || isnan(v[1].z) || isnan(v[2].z); + bool isTriangleInvalid = isnan(currentTriangleVertices[0].z) || isnan(currentTriangleVertices[1].z) || isnan(currentTriangleVertices[2].z); bool isCellPartiallyInvalid = isnan(cellHeights.x) || isnan(cellHeights.y) || isnan(cellHeights.z) || isnan(cellHeights.w); if (isTriangleInvalid) @@ -522,36 +491,33 @@ float4 fragMain(PSInput input) : SV_TARGET // move from grid space to screen space [unroll] for (int i = 0; i < 3; ++i) - v[i].xy += topLeft; - - if (triangleA) - { - outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[2].xy, v[0].xy); - outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[2].xy, v[1].xy); - } - else - { - outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[1].xy, v[2].xy); - outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[2].xy); - } - - // test diagonal draw - //outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[1].xy); - //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(v[0].xy, v[1].xy); - + currentTriangleVertices[i].xy += topLeft; float distancesToVerticalCellSides = min(insideCellCoord.x, cellWidth - insideCellCoord.x); float distancesToHorizontalCellSides = min(insideCellCoord.y, cellWidth - insideCellCoord.y); float patternCellCoord = distancesToVerticalCellSides >= distancesToHorizontalCellSides ? cellCoords.x : cellCoords.y; + } - float reciprocalPatternLength = input.getGridDTMOutlineStipplePatternLengthReciprocal(); - if(reciprocalPatternLength > 0.0f) - outlinePhaseShift = (cellWidth * (1.0f / globals.screenToWorldRatio) * patternCellCoord) * reciprocalPatternLength; + // find the nearest horizontal and vertical line to the fragment + nbl::hlsl::shapes::Line outlineLineSegments[2]; + { + const float halfCellWidth = cellWidth * 0.5f; + const float2 nearestLineRemainingCoords = int2((gridSpacePos + halfCellWidth) / cellWidth) * cellWidth + topLeft; + + // find the nearest horizontal line + outlineLineSegments[0].P0 = float32_t2(topLeft.x, nearestLineRemainingCoords.y); + outlineLineSegments[0].P1 = float32_t2(topLeft.x + gridExtents.x, nearestLineRemainingCoords.y); + outlineLineSegments[1].P0 = float32_t2(nearestLineRemainingCoords.x, topLeft.y); + outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, topLeft.y + gridExtents.y); + + // test diagonal draw (to draw diagonals height or contour shading must be enabled) + outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); + outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); } - const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); - float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangleVertices[0], currentTriangleVertices[1], currentTriangleVertices[2], input.position.xy); + float height = baryCoord.x * currentTriangleVertices[0].z + baryCoord.y * currentTriangleVertices[1].z + baryCoord.z * currentTriangleVertices[2].z; float2 heightDeriv = fwidth(height); const bool outOfBoundsUV = uv.x < 0.0f || uv.y < 0.0f || uv.x > 1.0f || uv.y > 1.0f; @@ -559,12 +525,12 @@ float4 fragMain(PSInput input) : SV_TARGET if (dtmSettings.drawContourEnabled()) { for (int i = dtmSettings.contourSettingsCount-1u; i >= 0; --i) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], v, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], currentTriangleVertices, input.position.xy, height)); } if (dtmSettings.drawOutlineEnabled()) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, outlinePhaseShift)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, 0.0f)); if (dtmSettings.drawHeightShadingEnabled() && !outOfBoundsUV) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, v, heightDeriv, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, currentTriangleVertices, heightDeriv, input.position.xy, height)); textureColor = dtmColor.rgb / dtmColor.a; localAlpha = dtmColor.a; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 1cc75c570..6aa43cdf6 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -650,8 +650,7 @@ PSInput main(uint vertexID : SV_VertexID) pfloat64_t2 worldSpaceExtents = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2), 8u); float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); - float reciprocalOutlineStipplePatternLength = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); - float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + 2u * sizeof(float), 8u); + float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); // for testing purpose thicknessOfTheThickestLine += 200.0f; @@ -669,7 +668,6 @@ PSInput main(uint vertexID : SV_VertexID) outV.setGridDTMScreenSpacePosition(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, vtxPos)); outV.setGridDTMScreenSpaceTopLeft(transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, topLeft)); outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * globals.screenToWorldRatio); - outV.setGridDTMOutlineStipplePatternLengthReciprocal(reciprocalOutlineStipplePatternLength); static const float SquareRootOfTwo = 1.4142135f; const pfloat64_t dilationFactor = SquareRootOfTwo * thicknessOfTheThickestLine; From 9ed92e5875006f4a9fb10598e1325a1dad89af91 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Mon, 16 Jun 2025 11:13:22 +0400 Subject: [PATCH 125/129] Fixed Geometry Fixes --- 62_CAD/DrawResourcesFiller.cpp | 71 +++++++++++++++++++++++----------- 62_CAD/DrawResourcesFiller.h | 20 ++++++++++ 2 files changed, 68 insertions(+), 23 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b540d9257..6ecbc4771 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -230,18 +230,7 @@ void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polylin setActiveLineStyle(lineStyleInfo); - if (!activeProjections.empty()) - { - // if there is already an active custom projection, it should be considered into the transformation of the fixed geometry polyline - float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); - pushCustomProjection(newTransformation); - } - else - { - // will be multiplied by the default projection matrix from the left (in shader), no need to consider it here - pushCustomProjection(transformation); - } - + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); beginMainObject(MainObjectType::POLYLINE, transformationType); drawPolyline(polyline, intendedNextSubmit); endMainObject(); @@ -403,17 +392,7 @@ void DrawResourcesFiller::drawFixedGeometryHatch( en::nabla2d::TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) { - if (!activeProjections.empty()) - { - // if there is already an active custom projection, it should be considered into the transformation of the fixed geometry polyline - float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); - pushCustomProjection(newTransformation); - } - else - { - // will be multiplied by the default projection matrix from the left (in shader), no need to consider it here - pushCustomProjection(transformation); - } + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit, transformationType); popCustomProjection(); } @@ -1718,6 +1697,52 @@ uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtm return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers } +float64_t3x3 DrawResourcesFiller::getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const +{ + if (!activeProjections.empty()) + { + float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); + + if (transformationType == TransformationType::TT_NORMAL) + { + return newTransformation; + } + else if (transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) + { + // Extract normalized rotation columns + float64_t2 column0 = nbl::hlsl::normalize(float64_t2(newTransformation[0][0], newTransformation[1][0])); + float64_t2 column1 = nbl::hlsl::normalize(float64_t2(newTransformation[0][1], newTransformation[1][1])); + + // Extract fixed screen-space scale from the original transformation + float64_t2 fixedScale = float64_t2( + nbl::hlsl::length(float64_t2(transformation[0][0], transformation[1][0])), + nbl::hlsl::length(float64_t2(transformation[0][1], transformation[1][1]))); + + // Apply fixed scale to normalized directions + column0 *= fixedScale.x; + column1 *= fixedScale.y; + + // Compose final matrix with adjusted columns + newTransformation[0][0] = column0[0]; + newTransformation[1][0] = column0[1]; + newTransformation[0][1] = column1[0]; + newTransformation[1][1] = column1[1]; + + return newTransformation; + } + else + { + // Fallback if transformationType is unrecognized, shouldn't happen + return newTransformation; + } + } + else + { + // Within no active projection scope, return transformation directly + return transformation; + } +} + uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { if (activeLineStyleIndex == InvalidStyleIdx) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 1babd7d7a..747a225a9 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -551,6 +551,26 @@ struct DrawResourcesFiller /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + /** + * @brief Computes the final transformation matrix for fixed geometry rendering, + * considering any active custom projections and the transformation type. + * + * This function handles how a given transformation should be applied depending on the + * current transformation type and the presence of any active projection matrices. + * + * - If no active projection exists, the input transformation is returned unmodified. + * + * - If an active projection exists: + * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. + * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, + * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. + * + * @param transformation The input 3x3 transformation matrix to apply. + * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). + * + */ + float64_t3x3 getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const; + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); From c111500a1f572b082e8fbb340c71bb8955d68244 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 16 Jun 2025 15:30:42 +0200 Subject: [PATCH 126/129] Added option to draw grid without height texture provided --- 62_CAD/DrawResourcesFiller.cpp | 16 +++++++++------ 62_CAD/DrawResourcesFiller.h | 8 ++++++-- 62_CAD/main.cpp | 20 ++++++++++++++----- .../main_pipeline/fragment_shader.hlsl | 14 +++++++++++-- 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 3935e26d3..fcf271383 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -746,22 +746,23 @@ void DrawResourcesFiller::drawGridDTM( float gridCellWidth, uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, - SIntendedSubmitInfo& intendedNextSubmit) + SIntendedSubmitInfo& intendedNextSubmit, + const bool drawGridOnly/* = false*/) { GridDTMInfo gridDTMInfo; gridDTMInfo.topLeft = topLeft; gridDTMInfo.worldSpaceExtents = worldSpaceExtents; gridDTMInfo.gridCellWidth = gridCellWidth; - gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if(!drawGridOnly) + gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory // determine the thickes line float thickestLineThickness = 0.0f; - if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) { thickestLineThickness = dtmSettingsInfo.outlineStyleInfo.worldSpaceLineWidth + dtmSettingsInfo.outlineStyleInfo.screenSpaceLineWidth; } - else if (dtmSettingsInfo.mode & E_DTM_MODE::CONTOUR) + else if (dtmSettingsInfo.mode & E_DTM_MODE::CONTOUR && !drawGridOnly) { for (int i = 0; i < dtmSettingsInfo.contourSettingsCount; ++i) { @@ -772,7 +773,7 @@ void DrawResourcesFiller::drawGridDTM( } gridDTMInfo.thicknessOfTheThickestLine = thickestLineThickness; - setActiveDTMSettings(dtmSettingsInfo); + setActiveDTMSettings(dtmSettingsInfo, drawGridOnly); beginMainObject(MainObjectType::GRID_DTM); uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); @@ -1001,10 +1002,13 @@ void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) activeLineStyleIndex = InvalidStyleIdx; } -void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo) +void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo, const bool disableHeightRelatedDTMModes/* = false*/) { activeDTMSettings = dtmSettingsInfo; activeDTMSettingsIndex = InvalidDTMSettingsIdx; + + if (disableHeightRelatedDTMModes) + activeDTMSettings.mode &= E_DTM_MODE::OUTLINE; } void DrawResourcesFiller::beginMainObject(MainObjectType type, TransformationType transformationType) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index dd24ea2e9..d1128d556 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -243,7 +243,8 @@ struct DrawResourcesFiller float gridCellWidth, uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, - SIntendedSubmitInfo& intendedNextSubmit); + SIntendedSubmitInfo& intendedNextSubmit, + const bool drawGridOnly = false); /** * @brief Adds a static 2D image to the draw resource set for rendering. @@ -355,7 +356,10 @@ struct DrawResourcesFiller // Setting Active Resources: void setActiveLineStyle(const LineStyleInfo& lineStyle); - void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); + /** + * @param disableHeightRelatedDTMModes disables E_DTM_MODE::CONTOUR and E_DTOM_MODE::HEIGHT_SHADING, necessary when we want to draw a grid DTM without using a height map texture + */ + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo, const bool disableHeightRelatedDTMModes = false); void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); void endMainObject(); diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 49f6090e7..3195c7964 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3654,16 +3654,26 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; const uint64_t heightMapTextureID = 0ull; - StaticImageInfo heightMapStaticImageInfo = { + constexpr bool DrawGridOnly = true; + + if(DrawGridOnly) + { + dtmInfo.mode = E_DTM_MODE::OUTLINE; + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit, DrawGridOnly); + } + else + { + StaticImageInfo heightMapStaticImageInfo = { .imageID = heightMapTextureID, .cpuImage = gridDTMHeightMap, .forceUpdate = false, .imageViewFormatOverride = asset::E_FORMAT::EF_R32G32B32A32_UINT // for now we use only R32G32B32A32_* anyway - }; + }; - if (!drawResourcesFiller.ensureStaticImageAvailability(heightMapStaticImageInfo, intendedNextSubmit)) - m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); - drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + if (!drawResourcesFiller.ensureStaticImageAvailability(heightMapStaticImageInfo, intendedNextSubmit)) + m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + } // draw test polyline #if 0 diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index aca52e937..f91b2ab51 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -474,12 +474,22 @@ float4 fragMain(PSInput input) : SV_TARGET currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + + // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 + //currentTriangleVertices[0] = float3(0.0f, 0.0f, cellHeights.w); + //currentTriangleVertices[1] = float3(cellWidth, cellWidth, cellHeights.y); + //currentTriangleVertices[2] = triangleA ? float3(0.0f, cellWidth, cellHeights.x) : float3(cellWidth, 0.0f, cellHeights.z); } else { currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + + // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 + //currentTriangleVertices[0] = float3(0.0f, 0.0f + cellWidth, cellHeights.x); + //currentTriangleVertices[1] = float3(0.0f + cellWidth, 0.0f, cellHeights.z); + //currentTriangleVertices[2] = triangleA ? float3(0.0f, 0.0f, cellHeights.w) : float3(cellWidth, cellWidth, cellHeights.y); } bool isTriangleInvalid = isnan(currentTriangleVertices[0].z) || isnan(currentTriangleVertices[1].z) || isnan(currentTriangleVertices[2].z); @@ -512,8 +522,8 @@ float4 fragMain(PSInput input) : SV_TARGET outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, topLeft.y + gridExtents.y); // test diagonal draw (to draw diagonals height or contour shading must be enabled) - outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); - outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); + //outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); + //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); } const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangleVertices[0], currentTriangleVertices[1], currentTriangleVertices[2], input.position.xy); From a7cfeeb63e7891912124ab1746150c768a5bbcc9 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 16 Jun 2025 16:52:41 +0200 Subject: [PATCH 127/129] Fix --- 62_CAD/DrawResourcesFiller.cpp | 12 ++++++------ 62_CAD/DrawResourcesFiller.h | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 8362addef..cc9e513e8 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -368,12 +368,12 @@ void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, } void DrawResourcesFiller::drawFixedGeometryHatch( - const en::nabla2d::Hatch& hatch, + const Hatch& hatch, const float32_t4& foregroundColor, const float32_t4& backgroundColor, - const en::nabla2d::HatchFillPattern fillPattern, + const HatchFillPattern fillPattern, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) { // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) @@ -389,7 +389,7 @@ void DrawResourcesFiller::drawFixedGeometryHatch( const float32_t4& color, const HatchFillPattern fillPattern, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) { pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); @@ -401,7 +401,7 @@ void DrawResourcesFiller::drawFixedGeometryHatch( const Hatch& hatch, const float32_t4& color, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) { drawFixedGeometryHatch(hatch, color, HatchFillPattern::SOLID_FILL, transformation, transformationType, intendedNextSubmit); @@ -412,7 +412,7 @@ void DrawResourcesFiller::drawHatch_impl( const float32_t4& color, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit, - en::nabla2d::TransformationType transformationType) + TransformationType transformationType) { if (color.a == 0.0f) // not visible return; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 6a41849d2..1a74338e7 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -229,12 +229,12 @@ struct DrawResourcesFiller //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background void drawFixedGeometryHatch( - const en::nabla2d::Hatch& hatch, + const Hatch& hatch, const float32_t4& foregroundColor, const float32_t4& backgroundColor, - const en::nabla2d::HatchFillPattern fillPattern, + const HatchFillPattern fillPattern, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); // ! Fixed-geometry Hatch with MSDF Pattern @@ -243,7 +243,7 @@ struct DrawResourcesFiller const float32_t4& color, const HatchFillPattern fillPattern, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); // ! Solid Fill Fixed-geometry Hatch @@ -251,7 +251,7 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, const float64_t3x3& transformation, - en::nabla2d::TransformationType transformationType, + TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); /// Used by SingleLineText, Issue drawing a font glyph @@ -675,7 +675,7 @@ struct DrawResourcesFiller const float32_t4& color, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit, - en::nabla2d::TransformationType transformationType = en::nabla2d::TransformationType::TT_NORMAL); + TransformationType transformationType = TransformationType::TT_NORMAL); void resetMainObjects() { From 9f5da5ca6dea5871981f8b014acde070b7372917 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 16 Jun 2025 18:17:16 +0200 Subject: [PATCH 128/129] Fixed warnings --- 62_CAD/DrawResourcesFiller.cpp | 17 +++++++++++++++-- 62_CAD/main.cpp | 2 +- 62_CAD/shaders/main_pipeline/dtm.hlsl | 12 ++++++------ .../shaders/main_pipeline/fragment_shader.hlsl | 6 +++--- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index cc9e513e8..b40f6585c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -777,8 +777,14 @@ void DrawResourcesFiller::drawGridDTM( uint64_t textureID, const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit, - const bool drawGridOnly/* = false*/) + bool drawGridOnly/* = false*/) { + if (dtmSettingsInfo.mode == 0u) + return; + + if (dtmSettingsInfo.mode == E_DTM_MODE::OUTLINE) + drawGridOnly = true; + GridDTMInfo gridDTMInfo; gridDTMInfo.topLeft = topLeft; gridDTMInfo.worldSpaceExtents = worldSpaceExtents; @@ -2338,6 +2344,13 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc // Try creating the image and allocating memory for it: nbl::video::IGPUImage::SCreationParams params = {}; params = imageParams; + + if (imageViewFormatOverride != asset::E_FORMAT::EF_COUNT && imageViewFormatOverride != imageParams.format) + { + // TODO: figure out why this crashes the app + //params.viewFormats.set(static_cast(imageViewFormatOverride), true); + params.flags |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + } auto gpuImage = device->createImage(std::move(params)); if (gpuImage) @@ -2368,7 +2381,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc IGPUImageView::SCreationParams viewParams = { .image = gpuImage, .viewType = IGPUImageView::ET_2D, - .format = (imageViewFormatOverride == asset::E_FORMAT::EF_COUNT) ? gpuImage->getCreationParameters().format : EF_R32G32B32A32_UINT + .format = (imageViewFormatOverride == asset::E_FORMAT::EF_COUNT) ? gpuImage->getCreationParameters().format : imageViewFormatOverride }; ret.gpuImageView = device->createImageView(std::move(viewParams)); if (ret.gpuImageView) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 3195c7964..5cb4082bd 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3654,7 +3654,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; const uint64_t heightMapTextureID = 0ull; - constexpr bool DrawGridOnly = true; + constexpr bool DrawGridOnly = false; if(DrawGridOnly) { diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 0aced1b89..68d58c3ad 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -118,14 +118,14 @@ float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 v if (heightMapSize > 0) { // partially based on https://www.shadertoy.com/view/XsXSz4 by Inigo Quilez - float2 e0 = v[1] - v[0]; - float2 e1 = v[2] - v[1]; - float2 e2 = v[0] - v[2]; + float2 e0 = (v[1] - v[0]).xy; + float2 e1 = (v[2] - v[1]).xy; + float2 e2 = (v[0] - v[2]).xy; float triangleAreaSign = -sign(e0.x * e2.y - e0.y * e2.x); - float2 v0 = fragPos - v[0]; - float2 v1 = fragPos - v[1]; - float2 v2 = fragPos - v[2]; + float2 v0 = fragPos - v[0].xy; + float2 v1 = fragPos - v[1].xy; + float2 v2 = fragPos - v[2].xy; float distanceToLine0 = sqrt(dot2(v0 - e0 * dot(v0, e0) / dot(e0, e0))); float distanceToLine1 = sqrt(dot2(v1 - e1 * dot(v1, e1) / dot(e1, e1))); diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index f91b2ab51..fb8e13673 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -137,7 +137,7 @@ float4 fragMain(PSInput input) : SV_TARGET v[1] = input.getScreenSpaceVertexAttribs(1); v[2] = input.getScreenSpaceVertexAttribs(2); - const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0], v[1], v[2], input.position.xy); + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(v[0].xy, v[1].xy, v[2].xy, input.position.xy); float height = baryCoord.x * v[0].z + baryCoord.y * v[1].z + baryCoord.z * v[2].z; float heightDeriv = fwidth(height); @@ -526,9 +526,9 @@ float4 fragMain(PSInput input) : SV_TARGET //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); } - const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangleVertices[0], currentTriangleVertices[1], currentTriangleVertices[2], input.position.xy); + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy, currentTriangleVertices[2].xy, input.position.xy); float height = baryCoord.x * currentTriangleVertices[0].z + baryCoord.y * currentTriangleVertices[1].z + baryCoord.z * currentTriangleVertices[2].z; - float2 heightDeriv = fwidth(height); + float heightDeriv = fwidth(height); const bool outOfBoundsUV = uv.x < 0.0f || uv.y < 0.0f || uv.x > 1.0f || uv.y > 1.0f; float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); From 8df4f585c92afc7a7541dd263b4dd13de2c6be6e Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 17 Jun 2025 21:16:50 +0200 Subject: [PATCH 129/129] Initial neighbouring cells drawing --- 62_CAD/shaders/main_pipeline/dtm.hlsl | 90 ++++++++++++ .../main_pipeline/fragment_shader.hlsl | 138 +++++++++++------- .../shaders/main_pipeline/vertex_shader.hlsl | 4 +- 3 files changed, 176 insertions(+), 56 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl index 68d58c3ad..e90f685ba 100644 --- a/62_CAD/shaders/main_pipeline/dtm.hlsl +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -450,6 +450,96 @@ E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) return INVALID; } +struct GridDTMTriangle +{ + float3 vertices[3]; +}; + +/** +* grid consists of square cells and cells are divided into two triangles: +* depending on mode it is +* either: or: +* v2a-------v1 v0-------v2b +* | A / | | \ B | +* | / | | \ | +* | / B | | A \ | +* v0-------v2b v2a-------v1 +*/ +struct GridDTMCell +{ + GridDTMTriangle triangleA; + GridDTMTriangle triangleB; +}; + +struct GridDTMHeightMapData +{ + // heihts.x - bottom left texel + // heihts.y - bottom right texel + // heihts.z - top right texel + // heihts.w - top left texel + float4 heights; + E_CELL_DIAGONAL cellDiagonal; +}; + +GridDTMHeightMapData retrieveGridDTMCellDataFromHeightMap(in float2 gridExtents, in float2 cellCoords, const float cellWidth, in Texture2D heightMap) +{ + GridDTMHeightMapData output; + + const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); + const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; + uint32_t4 cellData = heightMap.Gather(textureSampler, float2(location.x, location.y), 0); + + printf("%u %u %u %u", cellData.x, cellData.y, cellData.z, cellData.w); + + output.heights = asfloat(cellData); + output.cellDiagonal = dtm::resolveGridDTMCellDiagonal(cellData); + return output; +} + +GridDTMCell calculateCellTriangles(in float2 topLeft, in float2 gridExtents, in float2 cellCoords, const float cellWidth, in Texture2D heightMap) +{ + GridDTMCell output; + + // heightData.heihts.x - bottom left texel + // heightData.heihts.y - bottom right texel + // heightData.heihts.z - top right texel + // heightData.heihts.w - top left texel + dtm::GridDTMHeightMapData heightData = dtm::retrieveGridDTMCellDataFromHeightMap(gridExtents, cellCoords, cellWidth, heightMap); + const bool diagonalFromTopLeftToBottomRight = heightData.cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + + if (diagonalFromTopLeftToBottomRight) + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + } + else + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + } + + // move from grid space to screen space + [unroll] + for (int i = 0; i < 3; ++i) + { + output.triangleA.vertices[i].xy += topLeft; + output.triangleB.vertices[i].xy += topLeft; + } + + return output; +} + } #endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index fb8e13673..25564a964 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -117,6 +117,14 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp return color; } +bool isLineValid(in nbl::hlsl::shapes::Line l) +{ + bool isAnyLineComponentNaN = any(bool4(isnan(l.P0.x), isnan(l.P0.y), isnan(l.P1.x), isnan(l.P1.y))); + if (isAnyLineComponentNaN) + return false; + return true; +} + [[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] [shader("pixel")] float4 fragMain(PSInput input) : SV_TARGET @@ -422,8 +430,8 @@ float4 fragMain(PSInput input) : SV_TARGET float2 cellCoords; { float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; - cellCoords.x = uint32_t(gridSpacePosDivGridCellWidth.x); - cellCoords.y = uint32_t(gridSpacePosDivGridCellWidth.y); + cellCoords.x = int32_t(gridSpacePosDivGridCellWidth.x); + cellCoords.y = int32_t(gridSpacePosDivGridCellWidth.y); } float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; @@ -439,31 +447,24 @@ float4 fragMain(PSInput input) : SV_TARGET // // calculate screen space coordinates of vertices of the current tiranlge within the grid - float3 currentTriangleVertices[3]; + dtm::GridDTMTriangle currentTriangle; + dtm::GridDTMCell neighbouringCells[8]; + if (dtmSettings.drawContourEnabled() || dtmSettings.drawHeightShadingEnabled()) { - float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? - - uint32_t4 cellData; - // cellHeihts.x - bottom left texel - // cellHeihts.y - bottom right texel - // cellHeihts.z - top right texel - // cellHeihts.w - top left texel - float4 cellHeights = float4(InvalidGridDTMHeightValue, InvalidGridDTMHeightValue, InvalidGridDTMHeightValue, InvalidGridDTMHeightValue); - if (textureId != InvalidTextureIndex) - { - const float2 maxCellCoords = float2(round(gridExtents.x / cellWidth), round(gridExtents.y / cellWidth)); - const float2 location = (cellCoords + float2(0.5f, 0.5f)) / maxCellCoords; - - cellData = texturesU32[NonUniformResourceIndex(textureId)].Gather(textureSampler, float2(location.x, location.y), 0); - cellHeights = asfloat(cellData); - } - - const E_CELL_DIAGONAL cellDiagonal = dtm::resolveGridDTMCellDiagonal(cellData); - const bool diagonalFromTopLeftToBottomRight = cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + if (textureId == InvalidTextureIndex) + discard; - if (cellDiagonal == E_CELL_DIAGONAL::INVALID) + // heightData.heihts.x - bottom left texel + // heightData.heihts.y - bottom right texel + // heightData.heihts.z - top right texel + // heightData.heihts.w - top left texel + dtm::GridDTMHeightMapData heightData = dtm::retrieveGridDTMCellDataFromHeightMap(gridExtents, cellCoords, cellWidth, texturesU32[NonUniformResourceIndex(textureId)]); + if (heightData.cellDiagonal == E_CELL_DIAGONAL::INVALID) discard; + const bool diagonalFromTopLeftToBottomRight = heightData.cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT; + + float2 insideCellCoord = gridSpacePos - float2(cellWidth, cellWidth) * cellCoords; // TODO: use fmod instead? // my ASCII art above explains which triangle is A and which is B const bool triangleA = diagonalFromTopLeftToBottomRight ? insideCellCoord.x < insideCellCoord.y : @@ -471,29 +472,29 @@ float4 fragMain(PSInput input) : SV_TARGET if (diagonalFromTopLeftToBottomRight) { - currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w); - currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); - currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); + currentTriangle.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + currentTriangle.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + currentTriangle.vertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 - //currentTriangleVertices[0] = float3(0.0f, 0.0f, cellHeights.w); - //currentTriangleVertices[1] = float3(cellWidth, cellWidth, cellHeights.y); - //currentTriangleVertices[2] = triangleA ? float3(0.0f, cellWidth, cellHeights.x) : float3(cellWidth, 0.0f, cellHeights.z); + //currentTriangle.vertices[0] = float3(0.0f, 0.0f, heightData.heights.w); + //currentTriangle.vertices[1] = float3(cellWidth, cellWidth, heightData.heights.y); + //currentTriangle.vertices[2] = triangleA ? float3(0.0f, cellWidth, heightData.heights.x) : float3(cellWidth, 0.0f, heightData.heights.z); } else { - currentTriangleVertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.x); - currentTriangleVertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, cellHeights.z); - currentTriangleVertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, cellHeights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, cellHeights.y); + currentTriangle.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + currentTriangle.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + currentTriangle.vertices[2] = triangleA ? float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w) : float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); // TODO: use cell space instead https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/186#discussion_r2133699055 - //currentTriangleVertices[0] = float3(0.0f, 0.0f + cellWidth, cellHeights.x); - //currentTriangleVertices[1] = float3(0.0f + cellWidth, 0.0f, cellHeights.z); - //currentTriangleVertices[2] = triangleA ? float3(0.0f, 0.0f, cellHeights.w) : float3(cellWidth, cellWidth, cellHeights.y); + //currentTriangle.vertices[0] = float3(0.0f, 0.0f + cellWidth, heightData.heights.x); + //currentTriangle.vertices[1] = float3(0.0f + cellWidth, 0.0f, heightData.heights.z); + //currentTriangle.vertices[2] = triangleA ? float3(0.0f, 0.0f, heightData.heights.w) : float3(cellWidth, cellWidth, heightData.heights.y); } - bool isTriangleInvalid = isnan(currentTriangleVertices[0].z) || isnan(currentTriangleVertices[1].z) || isnan(currentTriangleVertices[2].z); - bool isCellPartiallyInvalid = isnan(cellHeights.x) || isnan(cellHeights.y) || isnan(cellHeights.z) || isnan(cellHeights.w); + bool isTriangleInvalid = isnan(currentTriangle.vertices[0].z) || isnan(currentTriangle.vertices[1].z) || isnan(currentTriangle.vertices[2].z); + bool isCellPartiallyInvalid = isnan(heightData.heights.x) || isnan(heightData.heights.y) || isnan(heightData.heights.z) || isnan(heightData.heights.w); if (isTriangleInvalid) discard; @@ -501,46 +502,75 @@ float4 fragMain(PSInput input) : SV_TARGET // move from grid space to screen space [unroll] for (int i = 0; i < 3; ++i) - currentTriangleVertices[i].xy += topLeft; - - float distancesToVerticalCellSides = min(insideCellCoord.x, cellWidth - insideCellCoord.x); - float distancesToHorizontalCellSides = min(insideCellCoord.y, cellWidth - insideCellCoord.y); - - float patternCellCoord = distancesToVerticalCellSides >= distancesToHorizontalCellSides ? cellCoords.x : cellCoords.y; + currentTriangle.vertices[i].xy += topLeft; + + const float2 neighbouringCellsCellOffsets[8] = { + float2(-1.0f, -1.0f), + float2(0.0f, -1.0f), + float2(1.0f, -1.0f), + float2(-1.0f, 0.0f), + float2(-1.0f, 0.0f), + float2(-1.0f, 1.0f), + float2(0.0f, 1.0f), + float2(1.0f, 1.0f) + }; + + // construct triangles of neighbouring cells + for (int i = 0; i < 8; ++i) + { + float2 neighbouringCellCoords = cellCoords + neighbouringCellsCellOffsets[i]; + neighbouringCells[i] = dtm::calculateCellTriangles(topLeft, gridExtents, neighbouringCellCoords, cellWidth, texturesU32[NonUniformResourceIndex(textureId)]); + } } // find the nearest horizontal and vertical line to the fragment nbl::hlsl::shapes::Line outlineLineSegments[2]; { const float halfCellWidth = cellWidth * 0.5f; - const float2 nearestLineRemainingCoords = int2((gridSpacePos + halfCellWidth) / cellWidth) * cellWidth + topLeft; + const float2 horizontalBounds = float2(topLeft.y, topLeft.y + gridExtents.y); + const float2 verticalBounds = float2(topLeft.x, topLeft.x + gridExtents.x); + float2 nearestLineRemainingCoords = int2((gridSpacePos + halfCellWidth) / cellWidth) * cellWidth + topLeft; + // shift lines outside of the grid to a bound + nearestLineRemainingCoords.x = clamp(nearestLineRemainingCoords.x, verticalBounds.x, verticalBounds.y); + nearestLineRemainingCoords.y = clamp(nearestLineRemainingCoords.y, horizontalBounds.x, horizontalBounds.y); // find the nearest horizontal line - outlineLineSegments[0].P0 = float32_t2(topLeft.x, nearestLineRemainingCoords.y); - outlineLineSegments[0].P1 = float32_t2(topLeft.x + gridExtents.x, nearestLineRemainingCoords.y); - outlineLineSegments[1].P0 = float32_t2(nearestLineRemainingCoords.x, topLeft.y); - outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, topLeft.y + gridExtents.y); + outlineLineSegments[0].P0 = float32_t2(verticalBounds.x, nearestLineRemainingCoords.y); + outlineLineSegments[0].P1 = float32_t2(verticalBounds.y, nearestLineRemainingCoords.y); + // find the nearest vertical line + outlineLineSegments[1].P0 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.x); + outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.y); // test diagonal draw (to draw diagonals height or contour shading must be enabled) //outlineLineSegments[0] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); //outlineLineSegments[1] = nbl::hlsl::shapes::Line::construct(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy); } - const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangleVertices[0].xy, currentTriangleVertices[1].xy, currentTriangleVertices[2].xy, input.position.xy); - float height = baryCoord.x * currentTriangleVertices[0].z + baryCoord.y * currentTriangleVertices[1].z + baryCoord.z * currentTriangleVertices[2].z; + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(currentTriangle.vertices[0].xy, currentTriangle.vertices[1].xy, currentTriangle.vertices[2].xy, input.position.xy); + float height = baryCoord.x * currentTriangle.vertices[0].z + baryCoord.y * currentTriangle.vertices[1].z + baryCoord.z * currentTriangle.vertices[2].z; float heightDeriv = fwidth(height); const bool outOfBoundsUV = uv.x < 0.0f || uv.y < 0.0f || uv.x > 1.0f || uv.y > 1.0f; float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); - if (dtmSettings.drawContourEnabled()) + if (dtmSettings.drawContourEnabled() && !outOfBoundsUV) { for (int i = dtmSettings.contourSettingsCount-1u; i >= 0; --i) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], currentTriangleVertices, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], currentTriangle.vertices, input.position.xy, height)); + + // draw shit form neighbouring cells + for (int i = 0; i < 8; ++i) + { + for (int j = dtmSettings.contourSettingsCount - 1u; j >= 0; --j) + { + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], neighbouringCells[i].triangleA.vertices, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMContourColor(dtmSettings.contourSettings[i], neighbouringCells[i].triangleB.vertices, input.position.xy, height)); + } + } } if (dtmSettings.drawOutlineEnabled()) dtmColor = dtm::blendUnder(dtmColor, dtm::calculateGridDTMOutlineColor(dtmSettings.outlineLineStyleIdx, outlineLineSegments, input.position.xy, 0.0f)); if (dtmSettings.drawHeightShadingEnabled() && !outOfBoundsUV) - dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, currentTriangleVertices, heightDeriv, input.position.xy, height)); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, currentTriangle.vertices, heightDeriv, input.position.xy, height)); textureColor = dtmColor.rgb / dtmColor.a; localAlpha = dtmColor.a; @@ -569,11 +599,11 @@ float4 fragMain(PSInput input) : SV_TARGET } } - uint2 fragCoord = uint2(input.position.xy); if (localAlpha <= 0) discard; + uint2 fragCoord = uint2(input.position.xy); const bool colorFromTexture = objType == ObjectType::STREAMED_IMAGE || objType == ObjectType::STATIC_IMAGE || objType == ObjectType::GRID_DTM; return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 6aa43cdf6..fd327e7fd 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -652,8 +652,8 @@ PSInput main(uint vertexID : SV_VertexID) float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); - // for testing purpose - thicknessOfTheThickestLine += 200.0f; + // test large dilation + //thicknessOfTheThickestLine += 200.0f; const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); worldSpaceExtents.y = ieee754::flipSign(worldSpaceExtents.y);