Skip to content

Commit 9b90a3e

Browse files
committed
Rebase doc_ids to min_doc_id to reduce space
1 parent 54e022d commit 9b90a3e

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

src/FileSegment.zig

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub fn search(self: Self, sorted_hashes: []const u32, results: *SearchResults) !
8686
if (block_no != prev_block_no) {
8787
prev_block_no = block_no;
8888
const block_data = self.getBlockData(block_no);
89-
try filefmt.readBlock(block_data, &block_items);
89+
try filefmt.readBlock(block_data, &block_items, self.min_doc_id);
9090
}
9191
const matches = std.sort.equalRange(Item, Item{ .hash = hash, .id = 0 }, block_items.items, {}, Item.cmpByHash);
9292
for (matches[0]..matches[1]) |i| {
@@ -197,7 +197,7 @@ pub const Reader = struct {
197197
self.index = 0;
198198
const block_data = self.segment.getBlockData(self.block_no);
199199
self.block_no += 1;
200-
try filefmt.readBlock(block_data, &self.items);
200+
try filefmt.readBlock(block_data, &self.items, self.segment.min_doc_id);
201201
}
202202
return self.items.items[self.index];
203203
}

src/filefmt.zig

+12-12
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ const BlockHeader = struct {
103103
first_item: Item,
104104
};
105105

106-
pub fn decodeBlockHeader(data: []const u8) !BlockHeader {
106+
pub fn decodeBlockHeader(data: []const u8, min_doc_id: u32) !BlockHeader {
107107
assert(data.len >= min_block_size);
108108

109109
const num_items = std.mem.readInt(u16, data[0..2], .little);
@@ -119,11 +119,11 @@ pub fn decodeBlockHeader(data: []const u8) !BlockHeader {
119119

120120
return .{
121121
.num_items = num_items,
122-
.first_item = Item{ .hash = hash.value, .id = id.value },
122+
.first_item = Item{ .hash = hash.value, .id = id.value + min_doc_id },
123123
};
124124
}
125125

126-
pub fn readBlock(data: []const u8, items: *std.ArrayList(Item)) !void {
126+
pub fn readBlock(data: []const u8, items: *std.ArrayList(Item), min_doc_id: u32) !void {
127127
var ptr: usize = 0;
128128

129129
if (data.len < 2) {
@@ -149,7 +149,7 @@ pub fn readBlock(data: []const u8, items: *std.ArrayList(Item)) !void {
149149
ptr += diff_doc_id.size;
150150

151151
last_hash += diff_hash.value;
152-
last_doc_id = if (diff_hash.value > 0) diff_doc_id.value else last_doc_id + diff_doc_id.value;
152+
last_doc_id = if (diff_hash.value > 0) diff_doc_id.value + min_doc_id else last_doc_id + diff_doc_id.value;
153153

154154
const item = items.addOneAssumeCapacity();
155155
item.* = .{ .hash = last_hash, .id = last_doc_id };
@@ -161,7 +161,7 @@ pub fn readBlock(data: []const u8, items: *std.ArrayList(Item)) !void {
161161
}
162162
}
163163

164-
pub fn encodeBlock(data: []u8, reader: anytype) !u16 {
164+
pub fn encodeBlock(data: []u8, reader: anytype, min_doc_id: u32) !u16 {
165165
assert(data.len >= 2);
166166

167167
var ptr: usize = 2;
@@ -174,7 +174,7 @@ pub fn encodeBlock(data: []u8, reader: anytype) !u16 {
174174
assert(item.hash > last_hash or (item.hash == last_hash and item.id >= last_doc_id));
175175

176176
const diff_hash = item.hash - last_hash;
177-
const diff_doc_id = if (diff_hash > 0) item.id else item.id - last_doc_id;
177+
const diff_doc_id = if (diff_hash > 0) item.id - min_doc_id else item.id - last_doc_id;
178178

179179
if (ptr + varint32Size(diff_hash) + varint32Size(diff_doc_id) > data.len) {
180180
break;
@@ -211,13 +211,13 @@ test "writeBlock/readBlock/readFirstItemFromBlock" {
211211
var block_data: [block_size]u8 = undefined;
212212

213213
var reader = segment.reader();
214-
const num_items = try encodeBlock(block_data[0..], &reader);
214+
const num_items = try encodeBlock(block_data[0..], &reader, 0);
215215
try testing.expectEqual(segment.items.items.len, num_items);
216216

217217
var items = std.ArrayList(Item).init(std.testing.allocator);
218218
defer items.deinit();
219219

220-
try readBlock(block_data[0..], &items);
220+
try readBlock(block_data[0..], &items, 0);
221221
try testing.expectEqualSlices(
222222
Item,
223223
&[_]Item{
@@ -230,7 +230,7 @@ test "writeBlock/readBlock/readFirstItemFromBlock" {
230230
items.items,
231231
);
232232

233-
const header = try decodeBlockHeader(block_data[0..]);
233+
const header = try decodeBlockHeader(block_data[0..], 0);
234234
try testing.expectEqual(items.items.len, header.num_items);
235235
try testing.expectEqual(items.items[0], header.first_item);
236236
}
@@ -343,7 +343,7 @@ pub fn writeSegmentFile(dir: std.fs.Dir, reader: anytype) !void {
343343

344344
var block_data: [block_size]u8 = undefined;
345345
while (true) {
346-
const n = try encodeBlock(block_data[0..], reader);
346+
const n = try encodeBlock(block_data[0..], reader, segment.min_doc_id);
347347
try writer.writeAll(block_data[0..]);
348348
if (n == 0) {
349349
break;
@@ -466,7 +466,7 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
466466
var block_data = block_data_buffer[0..block_size];
467467
while (true) {
468468
try reader.readNoEof(block_data);
469-
const block_header = try decodeBlockHeader(block_data);
469+
const block_header = try decodeBlockHeader(block_data, segment.min_doc_id);
470470
if (block_header.num_items == 0) {
471471
break;
472472
}
@@ -534,7 +534,7 @@ test "writeFile/readFile" {
534534
var items = std.ArrayList(Item).init(testing.allocator);
535535
defer items.deinit();
536536

537-
try readBlock(segment.getBlockData(0), &items);
537+
try readBlock(segment.getBlockData(0), &items, segment.min_doc_id);
538538
try std.testing.expectEqualSlices(Item, &[_]Item{
539539
Item{ .hash = 1, .id = 1 },
540540
Item{ .hash = 2, .id = 1 },

0 commit comments

Comments
 (0)