Skip to content

Commit 7a3d17e

Browse files
committed
Track max_document_id
1 parent 2a2ae23 commit 7a3d17e

7 files changed

+38
-2
lines changed

src/FileSegment.zig

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dir: std.fs.Dir,
2222
info: SegmentInfo = .{},
2323
attributes: std.StringHashMapUnmanaged(u64) = .{},
2424
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
25+
max_doc_id: u32 = 0,
2526
index: std.ArrayListUnmanaged(u32) = .{},
2627
block_size: usize = 0,
2728
blocks: []const u8,

src/IndexReader.zig

+12
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ pub fn getDocInfo(self: *Self, doc_id: u32) !?DocInfo {
5858
return null;
5959
}
6060

61+
pub fn getMaxDocId(self: *Self) u32 {
62+
var result: u32 = 0;
63+
inline for (segment_lists) |n| {
64+
const segments = @field(self, n);
65+
result = @max(result, segments.value.getMaxDocId());
66+
}
67+
return result;
68+
}
69+
6170
pub fn getVersion(self: *Self) u64 {
6271
if (self.memory_segments.value.getLast()) |node| {
6372
return node.value.info.version;
@@ -86,5 +95,8 @@ pub fn getAttributes(self: *Self, allocator: std.mem.Allocator) !std.StringHashM
8695
}
8796
}
8897

98+
// builtin attributes
99+
try attributes.put(allocator, "max_document_id", self.getMaxDocId());
100+
89101
return attributes;
90102
}

src/MemorySegment.zig

+5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ allocator: std.mem.Allocator,
2121
info: SegmentInfo = .{},
2222
attributes: std.StringHashMapUnmanaged(u64) = .{},
2323
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
24+
max_doc_id: u32 = 0,
2425
items: std.ArrayListUnmanaged(Item) = .{},
2526
frozen: bool = false,
2627

@@ -81,6 +82,7 @@ pub fn build(self: *Self, changes: []const Change) !void {
8182
try self.docs.ensureTotalCapacity(self.allocator, num_docs);
8283
try self.items.ensureTotalCapacity(self.allocator, num_items);
8384

85+
self.max_doc_id = 0;
8486
var i = changes.len;
8587
while (i > 0) {
8688
i -= 1;
@@ -94,6 +96,7 @@ pub fn build(self: *Self, changes: []const Change) !void {
9496
for (op.hashes, 0..) |hash, j| {
9597
items[j] = .{ .hash = hash, .id = op.id };
9698
}
99+
self.max_doc_id = @max(self.max_doc_id, op.id);
97100
}
98101
},
99102
.delete => |op| {
@@ -131,6 +134,8 @@ pub fn merge(self: *Self, merger: *SegmentMerger(Self)) !void {
131134
self.docs.deinit(self.allocator);
132135
self.docs = merger.segment.docs.move();
133136

137+
self.max_doc_id = merger.segment.max_doc_id;
138+
134139
self.items.clearRetainingCapacity();
135140
try self.items.ensureTotalCapacity(self.allocator, merger.estimated_size);
136141
while (true) {

src/filefmt.zig

+6
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,12 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
435435
try unpacker.readMapInto(&docs);
436436
segment.docs.deinit(segment.allocator);
437437
segment.docs = docs.unmanaged.move();
438+
439+
var iter = segment.docs.keyIterator();
440+
segment.max_doc_id = 0;
441+
while (iter.next()) |key_ptr| {
442+
segment.max_doc_id = @max(segment.max_doc_id, key_ptr.*);
443+
}
438444
}
439445

440446
const block_size = header.block_size;

src/segment_list.zig

+8
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ pub fn SegmentList(Segment: type) type {
110110
}
111111
}
112112

113+
pub fn getMaxDocId(self: Self) u32 {
114+
var result: u32 = 0;
115+
for (self.nodes.items) |node| {
116+
result = @max(result, node.value.max_doc_id);
117+
}
118+
return result;
119+
}
120+
113121
pub fn getDocInfo(self: Self, doc_id: u32) ?DocInfo {
114122
var result: ?DocInfo = null;
115123
for (self.nodes.items) |node| {

src/segment_merger.zig

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub const MergedSegmentInfo = struct {
99
info: SegmentInfo = .{},
1010
attributes: std.StringHashMapUnmanaged(u64) = .{},
1111
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
12+
max_doc_id: u32 = 0,
1213

1314
pub fn deinit(self: *MergedSegmentInfo, allocator: std.mem.Allocator) void {
1415
var iter = self.attributes.iterator();
@@ -115,6 +116,7 @@ pub fn SegmentMerger(comptime Segment: type) type {
115116
}
116117

117118
try self.segment.docs.ensureTotalCapacity(self.allocator, total_docs);
119+
self.segment.max_doc_id = 0;
118120
for (sources) |*source| {
119121
const segment = source.reader.segment;
120122
var docs_added: usize = 0;
@@ -127,6 +129,7 @@ pub fn SegmentMerger(comptime Segment: type) type {
127129
if (!self.collection.hasNewerVersion(doc_id, segment.info.version)) {
128130
try self.segment.docs.put(self.allocator, doc_id, doc_status);
129131
docs_added += 1;
132+
self.segment.max_doc_id = @max(self.segment.max_doc_id, doc_id);
130133
} else {
131134
try source.skip_docs.put(self.allocator, doc_id, {});
132135
}

tests/test_index_api.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def test_get_index_not_found(client, index_name):
2525
def test_get_index(client, index_name, create_index, fmt):
2626
req = client.post(f'/{index_name}/_update', json={
2727
'changes': [
28+
{'insert': {'id': 1, 'hashes': [101, 201, 301]}},
2829
{'set_attribute': {'name': 'foo', 'value': 1234}},
2930
],
3031
})
@@ -33,9 +34,9 @@ def test_get_index(client, index_name, create_index, fmt):
3334
req = client.get(f'/{index_name}', headers=headers(fmt))
3435
assert req.status_code == 200, req.content
3536
if fmt == 'json':
36-
expected = {'version': 1, 'segments': 1, 'attributes': {'foo': 1234}}
37+
expected = {'version': 1, 'segments': 1, 'attributes': {'foo': 1234, 'max_document_id': 1}}
3738
else:
38-
expected = {'v': 1, 's': 1, 'a': {'foo': 1234}}
39+
expected = {'v': 1, 's': 1, 'a': {'foo': 1234, 'max_document_id': 1}}
3940
assert decode(fmt, req.content) == expected
4041

4142

0 commit comments

Comments
 (0)