@@ -51,23 +51,13 @@ dir: std.fs.Dir,
51
51
52
52
oplog : Oplog ,
53
53
54
- memory_segments : SegmentListManager (MemorySegment ),
55
- file_segments : SegmentListManager (FileSegment ),
54
+ open_lock : std.Thread.Mutex = .{},
55
+ is_ready : std .atomic .Value (bool ),
56
+ load_task : ? Scheduler.Task = null ,
56
57
57
- // These segments are owned by the index and can't be accessed without acquiring segments_lock.
58
- // They can never be modified, only replaced.
59
58
segments_lock : std.Thread.RwLock = .{},
60
-
61
- // These locks give partial access to the respective segments list.
62
- // 1) For memory_segments, new segment can be appended to the list without this lock.
63
- // 2) For file_segments, no write operation can happen without this lock.
64
- // These lock can be only acquired before segments_lock, never after, to avoid deadlock situatons.
65
- // They are mostly useful to allowing read access to segments during merge/checkpoint, without blocking real-time update.
66
- file_segments_lock : std.Thread.Mutex = .{},
67
- memory_segments_lock : std.Thread.Mutex = .{},
68
-
69
- // Mutex used to control linearity of updates.
70
- update_lock : std.Thread.Mutex = .{},
59
+ memory_segments : SegmentListManager (MemorySegment ),
60
+ file_segments : SegmentListManager (FileSegment ),
71
61
72
62
checkpoint_task : ? Scheduler.Task = null ,
73
63
file_segment_merge_task : ? Scheduler.Task = null ,
@@ -123,12 +113,17 @@ pub fn init(allocator: std.mem.Allocator, scheduler: *Scheduler, parent_dir: std
123
113
.segments_lock = .{},
124
114
.memory_segments = memory_segments ,
125
115
.file_segments = file_segments ,
116
+ .is_ready = std .atomic .Value (bool ).init (false ),
126
117
};
127
118
}
128
119
129
120
pub fn deinit (self : * Self ) void {
130
121
log .info ("closing index {}" , .{@intFromPtr (self )});
131
122
123
+ if (self .load_task ) | task | {
124
+ self .scheduler .destroyTask (task );
125
+ }
126
+
132
127
if (self .checkpoint_task ) | task | {
133
128
self .scheduler .destroyTask (task );
134
129
}
@@ -148,36 +143,8 @@ pub fn deinit(self: *Self) void {
148
143
self .dir .close ();
149
144
}
150
145
151
- fn loadSegments (self : * Self , create : bool ) ! u64 {
152
- self .segments_lock .lock ();
153
- defer self .segments_lock .unlock ();
154
-
155
- const segment_ids = filefmt .readManifestFile (self .dir , self .allocator ) catch | err | {
156
- if (err == error .FileNotFound ) {
157
- if (create ) {
158
- try self .updateManifestFile (self .file_segments .segments .value );
159
- return 0 ;
160
- }
161
- return error .IndexNotFound ;
162
- }
163
- return err ;
164
- };
165
- defer self .allocator .free (segment_ids );
166
- log .info ("found {} segments in manifest" , .{segment_ids .len });
167
-
168
- try self .file_segments .segments .value .nodes .ensureTotalCapacity (self .allocator , segment_ids .len );
169
- var last_commit_id : u64 = 0 ;
170
- for (segment_ids , 1.. ) | segment_id , i | {
171
- const node = try FileSegmentList .loadSegment (self .allocator , segment_id , .{ .dir = self .dir });
172
- self .file_segments .segments .value .nodes .appendAssumeCapacity (node );
173
- last_commit_id = node .value .info .getLastCommitId ();
174
- log .info ("loaded segment {} ({}/{})" , .{ last_commit_id , i , segment_ids .len });
175
- }
176
- return last_commit_id ;
177
- }
178
-
179
146
fn doCheckpoint (self : * Self ) ! bool {
180
- var snapshot = self .acquireReader ();
147
+ var snapshot = try self .acquireReader ();
181
148
defer self .releaseReader (& snapshot );
182
149
183
150
const source = snapshot .memory_segments .value .getFirst () orelse return false ;
@@ -234,7 +201,7 @@ fn doCheckpoint(self: *Self) !bool {
234
201
}
235
202
236
203
fn updateDocsMetrics (self : * Self ) void {
237
- var snapshot = self .acquireReader ();
204
+ var snapshot = self .acquireReader () catch return ;
238
205
defer self .releaseReader (& snapshot );
239
206
240
207
metrics .docs (self .name , snapshot .getNumDocs ());
@@ -320,30 +287,81 @@ fn maybeMergeMemorySegments(self: *Self) !bool {
320
287
}
321
288
322
289
pub fn open (self : * Self , create : bool ) ! void {
323
- const last_commit_id = try self .loadSegments (create );
290
+ self .open_lock .lock ();
291
+ defer self .open_lock .unlock ();
292
+
293
+ if (self .is_ready .load (.monotonic )) {
294
+ return ;
295
+ }
296
+
297
+ if (self .load_task != null ) {
298
+ return error .AlreadyOpening ;
299
+ }
300
+
301
+ const manifest = filefmt .readManifestFile (self .dir , self .allocator ) catch | err | {
302
+ if (err == error .FileNotFound ) {
303
+ if (create ) {
304
+ try self .updateManifestFile (self .file_segments .segments .value );
305
+ try self .load (&.{});
306
+ return ;
307
+ }
308
+ return error .IndexNotFound ;
309
+ }
310
+ return err ;
311
+ };
312
+ errdefer self .allocator .free (manifest );
313
+
314
+ self .load_task = try self .scheduler .createTask (.medium , loadTask , .{ self , manifest });
315
+ self .scheduler .scheduleTask (self .load_task .? );
316
+ }
317
+
318
+ fn load (self : * Self , manifest : []SegmentInfo ) ! void {
319
+ defer self .allocator .free (manifest );
320
+
321
+ log .info ("found {} segments in manifest" , .{manifest .len });
322
+
323
+ try self .file_segments .segments .value .nodes .ensureTotalCapacity (self .allocator , manifest .len );
324
+ var last_commit_id : u64 = 0 ;
325
+ for (manifest , 1.. ) | segment_id , i | {
326
+ const node = try FileSegmentList .loadSegment (self .allocator , segment_id , .{ .dir = self .dir });
327
+ self .file_segments .segments .value .nodes .appendAssumeCapacity (node );
328
+ last_commit_id = node .value .info .getLastCommitId ();
329
+ log .info ("loaded segment {} ({}/{})" , .{ last_commit_id , i , manifest .len });
330
+ }
324
331
325
- self .checkpoint_task = try self .scheduler .createTask (.medium , checkpointTask , .{self });
326
332
self .memory_segment_merge_task = try self .scheduler .createTask (.high , memorySegmentMergeTask , .{self });
333
+ self .checkpoint_task = try self .scheduler .createTask (.medium , checkpointTask , .{self });
327
334
self .file_segment_merge_task = try self .scheduler .createTask (.low , fileSegmentMergeTask , .{self });
328
335
329
- try self .oplog .open (last_commit_id + 1 , updateInternal , self );
336
+ try self .oplog .open (1 , updateInternal , self );
330
337
331
338
log .info ("index loaded" , .{});
339
+
340
+ self .is_ready .store (true , .monotonic );
341
+ }
342
+
343
+ fn loadTask (self : * Self , manifest : []SegmentInfo ) void {
344
+ self .open_lock .lock ();
345
+ defer self .open_lock .unlock ();
346
+
347
+ self .load (manifest ) catch | err | {
348
+ log .err ("load failed: {}" , .{err });
349
+ };
332
350
}
333
351
334
352
fn maybeScheduleMemorySegmentMerge (self : * Self ) void {
335
353
if (self .memory_segments .needsMerge ()) {
336
- log .debug ("too many memory segments, scheduling merging" , .{});
337
354
if (self .memory_segment_merge_task ) | task | {
355
+ log .debug ("too many memory segments, scheduling merging" , .{});
338
356
self .scheduler .scheduleTask (task );
339
357
}
340
358
}
341
359
}
342
360
343
361
fn maybeScheduleFileSegmentMerge (self : * Self ) void {
344
362
if (self .file_segments .needsMerge ()) {
345
- log .debug ("too many file segments, scheduling merging" , .{});
346
363
if (self .file_segment_merge_task ) | task | {
364
+ log .debug ("too many file segments, scheduling merging" , .{});
347
365
self .scheduler .scheduleTask (task );
348
366
}
349
367
}
@@ -352,8 +370,8 @@ fn maybeScheduleFileSegmentMerge(self: *Self) void {
352
370
fn maybeScheduleCheckpoint (self : * Self ) void {
353
371
if (self .memory_segments .segments .value .getFirst ()) | first_node | {
354
372
if (first_node .value .getSize () >= self .options .min_segment_size ) {
355
- log .debug ("the first memory segment is too big, scheduling checkpoint" , .{});
356
373
if (self .checkpoint_task ) | task | {
374
+ log .debug ("the first memory segment is too big, scheduling checkpoint" , .{});
357
375
self .scheduler .scheduleTask (task );
358
376
}
359
377
}
@@ -372,11 +390,18 @@ fn readyForCheckpoint(self: *Self) ?MemorySegmentNode {
372
390
return null ;
373
391
}
374
392
393
+ fn checkIfReady (self : Self ) ! void {
394
+ if (! self .is_ready .load (.monotonic )) {
395
+ return error .IndexNotReady ;
396
+ }
397
+ }
398
+
375
399
pub fn update (self : * Self , changes : []const Change ) ! void {
400
+ try self .checkIfReady ();
376
401
try self .updateInternal (changes , null );
377
402
}
378
403
379
- pub fn updateInternal (self : * Self , changes : []const Change , commit_id : ? u64 ) ! void {
404
+ fn updateInternal (self : * Self , changes : []const Change , commit_id : ? u64 ) ! void {
380
405
var target = try MemorySegmentList .createSegment (self .allocator , .{});
381
406
defer MemorySegmentList .destroySegment (self .allocator , & target );
382
407
@@ -400,7 +425,9 @@ pub fn updateInternal(self: *Self, changes: []const Change, commit_id: ?u64) !vo
400
425
self .maybeScheduleCheckpoint ();
401
426
}
402
427
403
- pub fn acquireReader (self : * Self ) IndexReader {
428
+ pub fn acquireReader (self : * Self ) ! IndexReader {
429
+ try self .checkIfReady ();
430
+
404
431
self .segments_lock .lockShared ();
405
432
defer self .segments_lock .unlockShared ();
406
433
@@ -416,7 +443,7 @@ pub fn releaseReader(self: *Self, reader: *IndexReader) void {
416
443
}
417
444
418
445
pub fn search (self : * Self , hashes : []const u32 , allocator : std.mem.Allocator , deadline : Deadline ) ! SearchResults {
419
- var reader = self .acquireReader ();
446
+ var reader = try self .acquireReader ();
420
447
defer self .releaseReader (& reader );
421
448
422
449
return reader .search (hashes , allocator , deadline );
0 commit comments