Skip to content

Commit a2813b9

Browse files
committed
Track min_document_id
1 parent b071a56 commit a2813b9

7 files changed

+56
-7
lines changed

src/FileSegment.zig

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dir: std.fs.Dir,
2222
info: SegmentInfo = .{},
2323
attributes: std.StringHashMapUnmanaged(u64) = .{},
2424
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
25+
min_doc_id: u32 = 0,
2526
max_doc_id: u32 = 0,
2627
index: std.ArrayListUnmanaged(u32) = .{},
2728
block_size: usize = 0,

src/IndexReader.zig

+17-1
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,26 @@ pub fn getDocInfo(self: *Self, doc_id: u32) !?DocInfo {
5858
return null;
5959
}
6060

61+
pub fn getMinDocId(self: *Self) u32 {
62+
var result: u32 = 0;
63+
inline for (segment_lists) |n| {
64+
const segments = @field(self, n);
65+
const doc_id = segments.value.getMinDocId();
66+
if (result == 0 or doc_id < result) {
67+
result = doc_id;
68+
}
69+
}
70+
return result;
71+
}
72+
6173
pub fn getMaxDocId(self: *Self) u32 {
6274
var result: u32 = 0;
6375
inline for (segment_lists) |n| {
6476
const segments = @field(self, n);
65-
result = @max(result, segments.value.getMaxDocId());
77+
const doc_id = segments.value.getMaxDocId();
78+
if (result == 0 or doc_id > result) {
79+
result = doc_id;
80+
}
6681
}
6782
return result;
6883
}
@@ -96,6 +111,7 @@ pub fn getAttributes(self: *Self, allocator: std.mem.Allocator) !std.StringHashM
96111
}
97112

98113
// builtin attributes
114+
try attributes.put(allocator, "min_document_id", self.getMinDocId());
99115
try attributes.put(allocator, "max_document_id", self.getMaxDocId());
100116

101117
return attributes;

src/MemorySegment.zig

+8-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ allocator: std.mem.Allocator,
2121
info: SegmentInfo = .{},
2222
attributes: std.StringHashMapUnmanaged(u64) = .{},
2323
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
24+
min_doc_id: u32 = 0,
2425
max_doc_id: u32 = 0,
2526
items: std.ArrayListUnmanaged(Item) = .{},
2627
frozen: bool = false,
@@ -82,6 +83,7 @@ pub fn build(self: *Self, changes: []const Change) !void {
8283
try self.docs.ensureTotalCapacity(self.allocator, num_docs);
8384
try self.items.ensureTotalCapacity(self.allocator, num_items);
8485

86+
self.min_doc_id = 0;
8587
self.max_doc_id = 0;
8688
var i = changes.len;
8789
while (i > 0) {
@@ -96,7 +98,12 @@ pub fn build(self: *Self, changes: []const Change) !void {
9698
for (op.hashes, 0..) |hash, j| {
9799
items[j] = .{ .hash = hash, .id = op.id };
98100
}
99-
self.max_doc_id = @max(self.max_doc_id, op.id);
101+
if (self.min_doc_id == 0 or op.id < self.min_doc_id) {
102+
self.min_doc_id = op.id;
103+
}
104+
if (self.max_doc_id == 0 or op.id > self.max_doc_id) {
105+
self.max_doc_id = op.id;
106+
}
100107
}
101108
},
102109
.delete => |op| {

src/filefmt.zig

+7-1
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,15 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
437437
segment.docs = docs.unmanaged.move();
438438

439439
var iter = segment.docs.keyIterator();
440+
segment.min_doc_id = 0;
440441
segment.max_doc_id = 0;
441442
while (iter.next()) |key_ptr| {
442-
segment.max_doc_id = @max(segment.max_doc_id, key_ptr.*);
443+
if (segment.min_doc_id == 0 or key_ptr.* < segment.min_doc_id) {
444+
segment.min_doc_id = key_ptr.*;
445+
}
446+
if (segment.max_doc_id == 0 or key_ptr.* > segment.max_doc_id) {
447+
segment.max_doc_id = key_ptr.*;
448+
}
443449
}
444450
}
445451

src/segment_list.zig

+13-1
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,22 @@ pub fn SegmentList(Segment: type) type {
110110
}
111111
}
112112

113+
pub fn getMinDocId(self: Self) u32 {
114+
var result: u32 = 0;
115+
for (self.nodes.items) |node| {
116+
if (result == 0 or node.value.min_doc_id < result) {
117+
result = node.value.min_doc_id;
118+
}
119+
}
120+
return result;
121+
}
122+
113123
pub fn getMaxDocId(self: Self) u32 {
114124
var result: u32 = 0;
115125
for (self.nodes.items) |node| {
116-
result = @max(result, node.value.max_doc_id);
126+
if (result == 0 or node.value.max_doc_id > result) {
127+
result = node.value.max_doc_id;
128+
}
117129
}
118130
return result;
119131
}

src/segment_merger.zig

+8-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub const MergedSegmentInfo = struct {
99
info: SegmentInfo = .{},
1010
attributes: std.StringHashMapUnmanaged(u64) = .{},
1111
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
12+
min_doc_id: u32 = 0,
1213
max_doc_id: u32 = 0,
1314

1415
pub fn deinit(self: *MergedSegmentInfo, allocator: std.mem.Allocator) void {
@@ -116,6 +117,7 @@ pub fn SegmentMerger(comptime Segment: type) type {
116117
}
117118

118119
try self.segment.docs.ensureTotalCapacity(self.allocator, total_docs);
120+
self.segment.min_doc_id = 0;
119121
self.segment.max_doc_id = 0;
120122
for (sources) |*source| {
121123
const segment = source.reader.segment;
@@ -129,7 +131,12 @@ pub fn SegmentMerger(comptime Segment: type) type {
129131
if (!self.collection.hasNewerVersion(doc_id, segment.info.version)) {
130132
try self.segment.docs.put(self.allocator, doc_id, doc_status);
131133
docs_added += 1;
132-
self.segment.max_doc_id = @max(self.segment.max_doc_id, doc_id);
134+
if (self.segment.min_doc_id == 0 or doc_id < self.segment.min_doc_id) {
135+
self.segment.min_doc_id = doc_id;
136+
}
137+
if (self.segment.max_doc_id == 0 or doc_id > self.segment.max_doc_id) {
138+
self.segment.max_doc_id = doc_id;
139+
}
133140
} else {
134141
try source.skip_docs.put(self.allocator, doc_id, {});
135142
}

tests/test_index_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ def test_get_index(client, index_name, create_index, fmt):
3434
req = client.get(f'/{index_name}', headers=headers(fmt))
3535
assert req.status_code == 200, req.content
3636
if fmt == 'json':
37-
expected = {'version': 1, 'segments': 1, 'attributes': {'foo': 1234, 'max_document_id': 1}}
37+
expected = {'version': 1, 'segments': 1, 'attributes': {'foo': 1234, 'min_document_id': 1, 'max_document_id': 1}}
3838
else:
39-
expected = {'v': 1, 's': 1, 'a': {'foo': 1234, 'max_document_id': 1}}
39+
expected = {'v': 1, 's': 1, 'a': {'foo': 1234, 'min_document_id': 1, 'max_document_id': 1}}
4040
assert decode(fmt, req.content) == expected
4141

4242

0 commit comments

Comments
 (0)