Skip to content

Commit 5dcc2b4

Browse files
authored
Merge pull request #1328 from Concordium/lazy-module-load
Be lazier when loading modules.
2 parents 3cb759e + e6969a0 commit 5dcc2b4

File tree

5 files changed

+166
-84
lines changed

5 files changed

+166
-84
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
- Replace `BufferedRef` with `HashedBufferedRef` in `PoolRewards`
55
`bakerPoolRewardDetails::LFMBTree` field to cache computed hashes.
66

7+
- Improvements to the loading of modules. This particularly improves the performance of
8+
`GetModuleSource` in certain cases, and can also reduce start-up time.
9+
710
## 8.0.3
811

912
- Fix a bug where, after a protocol update in consensus version 1 (P6 onwards), a node may

concordium-consensus/src/Concordium/GlobalState/Persistent/BlobStore.hs

+18
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ module Concordium.GlobalState.Persistent.BlobStore (
6464
storeUpdateRef,
6565
loadRef,
6666
DirectBlobStorable (..),
67+
DirectBlobHashable (..),
6768

6869
-- * Nullable
6970
Nullable (..),
@@ -874,6 +875,23 @@ instance {-# OVERLAPPABLE #-} (MonadBlobStore m, BlobStorable m a) => DirectBlob
874875
{-# INLINE storeUpdateDirect #-}
875876
{-# INLINE loadDirect #-}
876877

878+
-- | The @DirectBlobHashable m h a@ class defines an operation for directly loading the hash
879+
-- of a value of type @a@ from the blob store. This allows for a more efficient implementation
880+
-- than loading the full value and then computing the hash in cases where loading may be expensive.
881+
class (MonadBlobStore m) => DirectBlobHashable m h a where
882+
-- | Load the hash of a value of type @a@ from the underlying storage.
883+
--
884+
-- prop> loadHash = getHashM <=< loadDirect
885+
loadHash :: BlobRef a -> m h
886+
887+
instance
888+
{-# OVERLAPPABLE #-}
889+
(DirectBlobStorable m a, MHashableTo m h a) =>
890+
DirectBlobHashable m h a
891+
where
892+
loadHash = getHashM <=< loadDirect
893+
{-# INLINE loadHash #-}
894+
877895
instance (BlobStorable m a, BlobStorable m b) => BlobStorable m (a, b) where
878896
storeUpdate (a, b) = do
879897
(!pa, !a') <- storeUpdate a

concordium-consensus/src/Concordium/GlobalState/Persistent/BlockState/Modules.hs

+106-78
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ import Concordium.Wasm
5050
import Control.Monad.Trans
5151
import qualified Data.ByteString as BS
5252
import Data.Coerce
53-
import Data.Foldable
5453
import Data.Map.Strict (Map)
5554
import qualified Data.Map.Strict as Map
5655
import Data.Serialize
@@ -152,76 +151,85 @@ instance HashableTo Hash Module where
152151
instance (Monad m) => MHashableTo m Hash Module
153152
instance (MonadBlobStore m) => Cacheable m Module
154153

154+
-- | Load a module from the underlying storage, without recompiling the artifact.
155+
loadModuleDirect :: (MonadLogger m, MonadBlobStore m) => BlobRef Module -> m Module
156+
loadModuleDirect br = do
157+
bs <- loadRaw br
158+
let getModule = do
159+
-- Offset of the start of the module
160+
startOffset <- fromIntegral <$> bytesRead
161+
-- Header
162+
miModuleRef <- get
163+
miExposedInit <- getSafeSetOf get
164+
miExposedReceive <- getSafeMapOf get (getSafeSetOf get)
165+
-- Artifact is serialized as @InstrumentedModule v@.
166+
artVersion <- get
167+
artLen <- getWord32be
168+
-- Offset of the start of the artifact
169+
artOffset <- fromIntegral <$> bytesRead
170+
-- Skip the actual body of the artifact; we deserialize as a 'BlobPtr' instead.
171+
skip (fromIntegral artLen)
172+
-- Footer
173+
miModuleSize <- getWord64be
174+
let miModule :: PersistentInstrumentedModuleV v
175+
miModule =
176+
PIMVPtr
177+
BlobPtr
178+
{ theBlobPtr =
179+
-- Start of the blob ref
180+
theBlobRef br
181+
-- Add the size of the length field for the blob ref
182+
+ 8
183+
-- Add the offset of the artifact
184+
+ artOffset
185+
-- Subtract the starting offset
186+
- startOffset,
187+
blobPtrLen = fromIntegral artLen
188+
}
189+
moduleVInterface :: GSWasm.ModuleInterfaceA (PersistentInstrumentedModuleV v)
190+
moduleVInterface = GSWasm.ModuleInterface{..}
191+
case artVersion of
192+
V0 -> do
193+
moduleVSource <- get
194+
return $! ModuleV0 (ModuleV{..})
195+
V1 -> do
196+
moduleVSource <- get
197+
return $! ModuleV1 (ModuleV{..})
198+
case runGet getModule bs of
199+
Left e -> error (e ++ " :: " ++ show bs)
200+
Right mv -> return mv
201+
155202
-- | This instance is based on and should be compatible with the 'Serialize' instance
156203
-- for 'BasicModuleInterface'.
157204
instance (MonadLogger m, MonadBlobStore m, MonadProtocolVersion m) => DirectBlobStorable m Module where
158-
loadDirect br = do
159-
bs <- loadRaw br
160-
let getModule = do
161-
-- Offset of the start of the module
162-
startOffset <- fromIntegral <$> bytesRead
163-
-- Header
164-
miModuleRef <- get
165-
miExposedInit <- getSafeSetOf get
166-
miExposedReceive <- getSafeMapOf get (getSafeSetOf get)
167-
-- Artifact is serialized as @InstrumentedModule v@.
168-
artVersion <- get
169-
artLen <- getWord32be
170-
-- Offset of the start of the artifact
171-
artOffset <- fromIntegral <$> bytesRead
172-
-- Skip the actual body of the artifact; we deserialize as a 'BlobPtr' instead.
173-
skip (fromIntegral artLen)
174-
-- Footer
175-
miModuleSize <- getWord64be
176-
let miModule :: PersistentInstrumentedModuleV v
177-
miModule =
178-
PIMVPtr
179-
BlobPtr
180-
{ theBlobPtr =
181-
-- Start of the blob ref
182-
theBlobRef br
183-
-- Add the size of the length field for the blob ref
184-
+ 8
185-
-- Add the offset of the artifact
186-
+ artOffset
187-
-- Subtract the starting offset
188-
- startOffset,
189-
blobPtrLen = fromIntegral artLen
190-
}
191-
moduleVInterface :: GSWasm.ModuleInterfaceA (PersistentInstrumentedModuleV v)
192-
moduleVInterface = GSWasm.ModuleInterface{..}
193-
case artVersion of
194-
V0 -> do
195-
moduleVSource <- get
196-
return $! ModuleV0 (ModuleV{..})
197-
V1 -> do
198-
moduleVSource <- get
199-
return $! ModuleV1 (ModuleV{..})
200-
case runGet getModule bs of
201-
Left e -> error (e ++ " :: " ++ show bs)
202-
Right mv@(ModuleV0 mv0@(ModuleV{moduleVInterface = GSWasm.ModuleInterface{miModule = PIMVPtr artPtr, ..}, ..})) | potentialLegacyArtifacts -> do
203-
artBS <- loadBlobPtr artPtr
204-
if GSWasm.isV0LegacyArtifact artBS
205-
then do
206-
logEvent GlobalState LLTrace $ "Recompiling V0 module " ++ show miModuleRef
207-
source <- loadRef moduleVSource
208-
case WasmV0.compileModule CSV0 source of
209-
Nothing -> error "Stored module that is not valid."
210-
Just (_, compiled) -> do
211-
return $! ModuleV0 mv0{moduleVInterface = (moduleVInterface mv0){GSWasm.miModule = PIMVMem compiled}}
212-
else return mv
213-
Right mv@(ModuleV1 mv1@(ModuleV{moduleVInterface = GSWasm.ModuleInterface{miModule = PIMVPtr artPtr, ..}, ..})) | potentialLegacyArtifacts -> do
214-
artBS <- loadBlobPtr artPtr
215-
if GSWasm.isV0LegacyArtifact artBS
216-
then do
217-
logEvent GlobalState LLTrace $ "Recompiling V1 module " ++ show miModuleRef
218-
source <- loadRef moduleVSource
219-
case WasmV1.compileModule (WasmV1.validationConfigAllowP1P6 CSV0) source of
220-
Nothing -> error "Stored module that is not valid."
221-
Just (_, compiled) -> do
222-
return $! ModuleV1 mv1{moduleVInterface = (moduleVInterface mv1){GSWasm.miModule = PIMVMem compiled}}
223-
else return mv
224-
Right mv -> return mv
205+
loadDirect br
206+
| potentialLegacyArtifacts = do
207+
mv <- loadModuleDirect br
208+
case mv of
209+
(ModuleV0 mv0@(ModuleV{moduleVInterface = GSWasm.ModuleInterface{miModule = PIMVPtr artPtr, ..}, ..})) -> do
210+
artBS <- loadBlobPtr artPtr
211+
if GSWasm.isV0LegacyArtifact artBS
212+
then do
213+
logEvent GlobalState LLTrace $ "Recompiling V0 module " ++ show miModuleRef
214+
source <- loadRef moduleVSource
215+
case WasmV0.compileModule CSV0 source of
216+
Nothing -> error "Stored module that is not valid."
217+
Just (_, compiled) -> do
218+
return $! ModuleV0 mv0{moduleVInterface = (moduleVInterface mv0){GSWasm.miModule = PIMVMem compiled}}
219+
else return mv
220+
(ModuleV1 mv1@(ModuleV{moduleVInterface = GSWasm.ModuleInterface{miModule = PIMVPtr artPtr, ..}, ..})) -> do
221+
artBS <- loadBlobPtr artPtr
222+
if GSWasm.isV0LegacyArtifact artBS
223+
then do
224+
logEvent GlobalState LLTrace $ "Recompiling V1 module " ++ show miModuleRef
225+
source <- loadRef moduleVSource
226+
case WasmV1.compileModule (WasmV1.validationConfigAllowP1P6 CSV0) source of
227+
Nothing -> error "Stored module that is not valid."
228+
Just (_, compiled) -> do
229+
return $! ModuleV1 mv1{moduleVInterface = (moduleVInterface mv1){GSWasm.miModule = PIMVMem compiled}}
230+
else return mv
231+
_ -> return mv
232+
| otherwise = loadModuleDirect br
225233
where
226234
-- When a node is running protocol 6 or lower it might have been started prior to the new notion of Wasm
227235
-- artifacts, which needs to be recompiled on load.
@@ -271,6 +279,14 @@ instance (MonadLogger m, MonadBlobStore m, MonadProtocolVersion m) => DirectBlob
271279
mkModule SV0 = ModuleV0
272280
mkModule SV1 = ModuleV1
273281

282+
instance (MonadBlobStore m) => DirectBlobHashable m Hash Module where
283+
loadHash br = do
284+
bs <- loadRaw br
285+
-- Decode the module reference only.
286+
case decode bs of
287+
Left e -> error $ "Could not decode stored module hash: " ++ e
288+
Right ModuleRef{..} -> return moduleRef
289+
274290
--------------------------------------------------------------------------------
275291

276292
-- | A cached 'Module' accessed via a cached 'Reference' i.e., a 'Reference'
@@ -320,13 +336,14 @@ instance (MonadProtocolVersion m, SupportsPersistentModule m) => BlobStorable m
320336
table <- load
321337
return $ do
322338
_modulesTable <- table
323-
_modulesMap <-
324-
foldl'
325-
( \m (idx, aModule) ->
326-
Map.insert (GSWasm.moduleReference aModule) idx m
339+
(_modulesMap, _) <-
340+
LFMB.mfoldRef
341+
( \(m, idx) modHCR -> do
342+
modRef <- ModuleRef <$> getHashM modHCR
343+
return $!! (Map.insert modRef idx m, idx + 1)
327344
)
328-
Map.empty
329-
<$> LFMB.toAscPairList _modulesTable
345+
(Map.empty, 0)
346+
_modulesTable
330347
return Modules{..}
331348
storeUpdate m@Modules{..} = do
332349
(pModulesTable, _modulesTable') <- storeUpdate _modulesTable
@@ -390,13 +407,24 @@ getInterface ::
390407
getInterface ref mods = fmap getModuleInterface <$> getModule ref mods
391408

392409
-- | Get the source of a module by module reference.
410+
-- This does not cache the module.
393411
getSource :: (MonadProtocolVersion m, SupportsPersistentModule m) => ModuleRef -> Modules -> m (Maybe WasmModule)
394412
getSource ref mods = do
395-
m <- getModule ref mods
396-
case m of
413+
mRef <- getModuleReference ref mods
414+
case mRef of
397415
Nothing -> return Nothing
398-
Just (ModuleV0 ModuleV{..}) -> Just . WasmModuleV0 <$> loadRef moduleVSource
399-
Just (ModuleV1 ModuleV{..}) -> Just . WasmModuleV1 <$> loadRef moduleVSource
416+
Just hcref -> do
417+
-- Since we only care about the source of the module, we can use 'loadModuleDirect',
418+
-- which will bypass recompiling the artifact. It will also not cache the module,
419+
-- but that is likely fine as the source is not cached anyway, and this is only
420+
-- ultimately used in GRPC queries.
421+
mdl <-
422+
openHashedCachedRef hcref >>= \case
423+
Left r -> loadModuleDirect r
424+
Right v -> return v
425+
case mdl of
426+
(ModuleV0 ModuleV{..}) -> Just . WasmModuleV0 <$> loadRef moduleVSource
427+
(ModuleV1 ModuleV{..}) -> Just . WasmModuleV1 <$> loadRef moduleVSource
400428

401429
-- | Get the list of all currently deployed modules.
402430
-- The order of the list is not specified.

concordium-consensus/src/Concordium/GlobalState/Persistent/CachedRef.hs

+27-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module Concordium.GlobalState.Persistent.CachedRef where
99

1010
import Control.Monad
1111
import Control.Monad.Trans
12+
import Data.Functor
1213
import Data.IORef
1314
import Data.Proxy
1415
import Data.Serialize (put)
@@ -437,9 +438,32 @@ makeFlushedHashedCachedRef val = do
437438
(br, _) <- storeUpdateDirect val
438439
return $! HCRFlushed br h
439440

441+
-- | Get the value underlying a 'HashedCachedRef' if it is in memory or the cache. Otherwise,
442+
-- return the 'BlobRef' to the value.
443+
openHashedCachedRef ::
444+
forall m h c a.
445+
( MonadCache c m,
446+
Cache c,
447+
CacheKey c ~ BlobRef a,
448+
CacheValue c ~ a
449+
) =>
450+
HashedCachedRef' h c a ->
451+
m (Either (BlobRef a) a)
452+
openHashedCachedRef HCRUnflushed{..} =
453+
liftIO (readIORef hcrUnflushed) >>= \case
454+
HCRMem val -> return (Right val)
455+
HCRMemHashed val _ -> return (Right val)
456+
HCRDisk r -> openHashedCachedRef r
457+
openHashedCachedRef HCRFlushed{..} =
458+
lookupCachedValue (Proxy @c) hcrBlob <&> \case
459+
Nothing -> Left hcrBlob
460+
Just val -> Right val
461+
462+
-- | Loading a 'HashedCachedRef' does not cache the value, but it does load the hash via 'loadHash'.
440463
instance
441464
( MonadCache c m,
442465
DirectBlobStorable m a,
466+
DirectBlobHashable m h a,
443467
Cache c,
444468
CacheKey c ~ BlobRef a,
445469
CacheValue c ~ a,
@@ -455,13 +479,10 @@ instance
455479
mref <- load
456480
return $ do
457481
hcrBlob <- mref
458-
val <-
482+
hcrHash <-
459483
lookupCachedValue (Proxy @c) hcrBlob >>= \case
460-
Nothing -> do
461-
val <- loadDirect hcrBlob
462-
putCachedValue (Proxy @c) hcrBlob val
463-
Just val -> return val
464-
hcrHash <- getHashM val
484+
Nothing -> loadHash hcrBlob
485+
Just val -> getHashM val
465486
return HCRFlushed{..}
466487

467488
-- | Caching a 'HashedCachedRef' does nothing on the principle that it is generally undesirable to

concordium-consensus/src/Concordium/GlobalState/Persistent/LFMBTree.hs

+12
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ module Concordium.GlobalState.Persistent.LFMBTree (
4949
fromAscListV,
5050

5151
-- * Traversal
52+
mfoldRef,
5253
mfold,
5354
mfoldDesc,
5455
migrateLFMBTree,
@@ -454,6 +455,17 @@ fromAscListNullable l = fromAscList $ go l 0
454455
| otherwise = (replicate (fromIntegral $ i - ix) Null) ++ go z i
455456
go [] _ = []
456457

458+
-- | Fold a monadic action over the leaves of the tree in ascending order of index.
459+
-- This is strict in the intermediate results.
460+
mfoldRef :: (CanStoreLFMBTree m ref1 l) => (a -> l -> m a) -> a -> LFMBTree' k ref1 l -> m a
461+
mfoldRef _ a0 Empty = return a0
462+
mfoldRef f !a0 (NonEmpty _ t) = mfoldT a0 t
463+
where
464+
mfoldT a (Leaf v) = f a v
465+
mfoldT a (Node _ l r) = do
466+
!a' <- mfoldT a =<< refLoad l
467+
mfoldT a' =<< refLoad r
468+
457469
-- | Fold a monadic action over the tree in ascending order of index.
458470
-- This is strict in the intermediate results.
459471
mfold :: (CanStoreLFMBTree m ref1 (ref2 v), Reference m ref2 v) => (a -> v -> m a) -> a -> LFMBTree' k ref1 (ref2 v) -> m a

0 commit comments

Comments
 (0)