Skip to content

Commit 0264eac

Browse files
author
devsh
committed
Finish the TLAS->BLAS tracking for device builds.
Add absolutely awesome callback to `IQueue::SSubmitInfo`
1 parent 5fc0da6 commit 0264eac

File tree

2 files changed

+85
-32
lines changed

2 files changed

+85
-32
lines changed

include/nbl/video/IQueue.h

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ class IQueue : public core::Interface, public core::Unmovable
9292
std::span<const SSemaphoreInfo> waitSemaphores = {};
9393
std::span<const SCommandBufferInfo> commandBuffers = {};
9494
std::span<const SSemaphoreInfo> signalSemaphores = {};
95+
// No guarantees are given about when it will execute, except that it will execute:
96+
// 1) after the `signalSemaphore.back()` signals
97+
// 2) in order w.r.t. all other submits on this queue
98+
// 3) after all lifetime tracking has been performed (so transient resources will already be dead!)
99+
// NOTE: This `std::function` WILL be copied!
100+
std::function<void()>* completionCallback = nullptr;
95101

96102
inline bool valid() const
97103
{
@@ -116,45 +122,34 @@ class IQueue : public core::Interface, public core::Unmovable
116122
virtual const void* getNativeHandle() const = 0;
117123

118124
// only public because MultiTimelineEventHandlerST needs to know about it
119-
class DeferredSubmitResourceDrop final
125+
class DeferredSubmitCallback final
120126
{
127+
//
128+
struct STLASBuildMetadata
129+
{
130+
core::unordered_set<IGPUTopLevelAccelerationStructure::blas_smart_ptr_t> m_BLASes;
131+
uint32_t m_buildVer;
132+
};
133+
core::unordered_map<IGPUTopLevelAccelerationStructure*,STLASBuildMetadata> m_TLASToBLASReferenceSets;
134+
//
121135
using smart_ptr = core::smart_refctd_ptr<IBackendObject>;
122-
core::smart_refctd_dynamic_array<smart_ptr> m_resources;
136+
core::smart_refctd_dynamic_array<smart_ptr> m_resources;
137+
//
138+
std::function<void()> m_callback;
123139

124140
public:
125-
inline DeferredSubmitResourceDrop(const SSubmitInfo& info)
126-
{
127-
// We could actually not hold any signal semaphore because you're expected to use the signal result somewhere else.
128-
// However it's possible to you might only wait on one from the set and then drop the rest (UB)
129-
m_resources = core::make_refctd_dynamic_array<decltype(m_resources)>(info.signalSemaphores.size()-1+info.commandBuffers.size()+info.waitSemaphores.size());
130-
auto outRes = m_resources->data();
131-
for (const auto& sema : info.waitSemaphores)
132-
*(outRes++) = smart_ptr(sema.semaphore);
133-
for (const auto& cb : info.commandBuffers)
134-
*(outRes++) = smart_ptr(cb.cmdbuf);
135-
// We don't hold the last signal semaphore, because the timeline does as an Event trigger.
136-
for (auto i=0u; i<info.signalSemaphores.size()-1; i++)
137-
*(outRes++) = smart_ptr(info.signalSemaphores[i].semaphore);
138-
}
139-
DeferredSubmitResourceDrop(const DeferredSubmitResourceDrop& other) = delete;
140-
inline DeferredSubmitResourceDrop(DeferredSubmitResourceDrop&& other) : m_resources(nullptr)
141+
DeferredSubmitCallback(const SSubmitInfo& info);
142+
DeferredSubmitCallback(const DeferredSubmitCallback& other) = delete;
143+
inline DeferredSubmitCallback(DeferredSubmitCallback&& other) : m_resources(nullptr)
141144
{
142145
this->operator=(std::move(other));
143146
}
144147

145-
DeferredSubmitResourceDrop& operator=(const DeferredSubmitResourceDrop& other) = delete;
146-
inline DeferredSubmitResourceDrop& operator=(DeferredSubmitResourceDrop&& other)
147-
{
148-
m_resources = std::move(other.m_resources);
149-
other.m_resources = nullptr;
150-
return *this;
151-
}
148+
DeferredSubmitCallback& operator=(const DeferredSubmitCallback& other) = delete;
149+
DeferredSubmitCallback& operator=(DeferredSubmitCallback&& other);
152150

153151
// always exhaustive poll, because we need to get rid of resources ASAP
154-
inline void operator()()
155-
{
156-
m_resources = nullptr;
157-
}
152+
void operator()();
158153
};
159154

160155
protected:
@@ -170,7 +165,7 @@ class IQueue : public core::Interface, public core::Unmovable
170165
virtual RESULT waitIdle_impl() const = 0;
171166

172167
// Refcounts all resources used by Pending Submits, gets occasionally cleared out
173-
std::unique_ptr<MultiTimelineEventHandlerST<DeferredSubmitResourceDrop,false>> m_submittedResources;
168+
std::unique_ptr<MultiTimelineEventHandlerST<DeferredSubmitCallback,false>> m_submittedResources;
174169
const ILogicalDevice* m_originDevice;
175170
const uint32_t m_familyIndex;
176171
const float m_priority;

src/nbl/video/IQueue.cpp

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace nbl::video
1313
IQueue::IQueue(ILogicalDevice* originDevice, const uint32_t _famIx, const core::bitflag<CREATE_FLAGS> _flags, const float _priority)
1414
: m_originDevice(originDevice), m_familyIndex(_famIx), m_priority(_priority), m_flags(_flags)
1515
{
16-
m_submittedResources = std::make_unique<MultiTimelineEventHandlerST<DeferredSubmitResourceDrop,false>>(originDevice);
16+
m_submittedResources = std::make_unique<MultiTimelineEventHandlerST<DeferredSubmitCallback,false>>(originDevice);
1717
}
1818

1919
auto IQueue::submit(const std::span<const SSubmitInfo> _submits) -> RESULT
@@ -111,7 +111,7 @@ auto IQueue::submit(const std::span<const SSubmitInfo> _submits) -> RESULT
111111
{
112112
// hold onto the semaphores and commandbuffers until the submit signals the last semaphore
113113
const auto& lastSignal = submit.signalSemaphores.back();
114-
m_submittedResources->latch({.semaphore=lastSignal.semaphore,.value=lastSignal.value},DeferredSubmitResourceDrop(submit));
114+
m_submittedResources->latch({.semaphore=lastSignal.semaphore,.value=lastSignal.value},DeferredSubmitCallback(submit));
115115
// Mark cmdbufs as done (wrongly but conservatively wrong)
116116
// We can't use `m_submittedResources` to mark them done, because the functor may run "late" in the future, after the cmdbuf has already been validly reset or resubmitted
117117
for (const auto& commandBuffer : submit.commandBuffers)
@@ -141,6 +141,64 @@ uint32_t IQueue::cullResources(const ISemaphore* sema)
141141
return m_submittedResources->poll().eventsLeft;
142142
}
143143

144+
IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info)
145+
{
146+
// We could actually not hold any signal semaphore because you're expected to use the signal result somewhere else.
147+
// However it's possible to you might only wait on one from the set and then drop the rest (UB)
148+
m_resources = core::make_refctd_dynamic_array<decltype(m_resources)>(info.signalSemaphores.size()-1+info.commandBuffers.size()+info.waitSemaphores.size());
149+
auto outRes = m_resources->data();
150+
for (const auto& sema : info.waitSemaphores)
151+
*(outRes++) = smart_ptr(sema.semaphore);
152+
for (const auto& cb : info.commandBuffers)
153+
{
154+
*(outRes++) = smart_ptr(cb.cmdbuf);
155+
// get the TLAS BLAS tracking info and assign a pending build version number
156+
for (const auto& refSet : cb.cmdbuf->m_TLASToBLASReferenceSets)
157+
{
158+
const auto tlas = refSet.first;
159+
// in theory could assert no duplicate entries, but thats obvious
160+
m_TLASToBLASReferenceSets[tlas] = { .m_BLASes = {refSet.second.begin(),refSet.second.end()}, .m_buildVer = tlas->registerNextBuildVer()};
161+
}
162+
}
163+
// We don't hold the last signal semaphore, because the timeline does as an Event trigger.
164+
for (auto i=0u; i<info.signalSemaphores.size()-1; i++)
165+
*(outRes++) = smart_ptr(info.signalSemaphores[i].semaphore);
166+
// copy the function object for the callback
167+
if (info.completionCallback)
168+
m_callback = *info.completionCallback;
169+
}
170+
171+
IQueue::DeferredSubmitCallback& IQueue::DeferredSubmitCallback::operator=(DeferredSubmitCallback&& other)
172+
{
173+
m_TLASToBLASReferenceSets = std::move(other.m_TLASToBLASReferenceSets);
174+
m_resources = std::move(other.m_resources);
175+
m_callback = std::move(other.m_callback);
176+
other.m_TLASToBLASReferenceSets = {};
177+
other.m_resources = nullptr;
178+
other.m_callback = {};
179+
return *this;
180+
}
181+
182+
// always exhaustive poll, because we need to get rid of resources ASAP
183+
void IQueue::DeferredSubmitCallback::operator()()
184+
{
185+
// first update tracking info (needs resources alive)
186+
for (const auto& refSet : m_TLASToBLASReferenceSets)
187+
{
188+
const auto tlas = refSet.first;
189+
const auto& blases = refSet.second.m_BLASes;
190+
tlas->setTrackedBLASes(blases.begin(),blases.end(),refSet.second.m_buildVer);
191+
}
192+
// then free all resources
193+
m_resources = nullptr;
194+
// then execute the callback
195+
if (m_callback)
196+
{
197+
m_callback();
198+
m_callback = {};
199+
}
200+
}
201+
144202
} // namespace nbl::video
145203

146204
#include "nbl/undef_logging_macros.h"

0 commit comments

Comments
 (0)