Skip to content

Commit 189a439

Browse files
authored
Merge pull request #36 from acoustid/docs-unmanaged
Convert data structures to *Unmanaged
2 parents 5f32ee4 + 049b4e0 commit 189a439

5 files changed

+61
-54
lines changed

src/FileSegment.zig

+7-9
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ allocator: std.mem.Allocator,
2121
dir: std.fs.Dir,
2222
info: SegmentInfo = .{},
2323
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
24-
docs: std.AutoHashMap(u32, bool),
25-
index: std.ArrayList(u32),
24+
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
25+
index: std.ArrayListUnmanaged(u32) = .{},
2626
block_size: usize = 0,
2727
blocks: []const u8,
2828
merged: u32 = 0,
@@ -35,16 +35,14 @@ pub fn init(allocator: std.mem.Allocator, options: Options) Self {
3535
return Self{
3636
.allocator = allocator,
3737
.dir = options.dir,
38-
.docs = std.AutoHashMap(u32, bool).init(allocator),
39-
.index = std.ArrayList(u32).init(allocator),
4038
.blocks = undefined,
4139
};
4240
}
4341

4442
pub fn deinit(self: *Self, delete_file: KeepOrDelete) void {
4543
self.attributes.deinit(self.allocator);
46-
self.docs.deinit();
47-
self.index.deinit();
44+
self.docs.deinit(self.allocator);
45+
self.index.deinit(self.allocator);
4846

4947
if (self.raw_data) |data| {
5048
std.posix.munmap(data);
@@ -138,9 +136,9 @@ test "build" {
138136

139137
source.info = .{ .version = 1 };
140138
source.frozen = true;
141-
try source.docs.put(1, true);
142-
try source.items.append(.{ .id = 1, .hash = 1 });
143-
try source.items.append(.{ .id = 1, .hash = 2 });
139+
try source.docs.put(source.allocator, 1, true);
140+
try source.items.append(source.allocator, .{ .id = 1, .hash = 1 });
141+
try source.items.append(source.allocator, .{ .id = 1, .hash = 2 });
144142

145143
var source_reader = source.reader();
146144
defer source_reader.close();

src/MemorySegment.zig

+9-11
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,23 @@ pub const Options = struct {};
2020
allocator: std.mem.Allocator,
2121
info: SegmentInfo = .{},
2222
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
23-
docs: std.AutoHashMap(u32, bool),
24-
items: std.ArrayList(Item),
23+
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
24+
items: std.ArrayListUnmanaged(Item) = .{},
2525
frozen: bool = false,
2626

2727
pub fn init(allocator: std.mem.Allocator, opts: Options) Self {
2828
_ = opts;
2929
return .{
3030
.allocator = allocator,
31-
.docs = std.AutoHashMap(u32, bool).init(allocator),
32-
.items = std.ArrayList(Item).init(allocator),
3331
};
3432
}
3533

3634
pub fn deinit(self: *Self, delete_file: KeepOrDelete) void {
3735
_ = delete_file;
3836

3937
self.attributes.deinit(self.allocator);
40-
self.docs.deinit();
41-
self.items.deinit();
38+
self.docs.deinit(self.allocator);
39+
self.items.deinit(self.allocator);
4240
}
4341

4442
pub fn search(self: Self, sorted_hashes: []const u32, results: *SearchResults) !void {
@@ -76,8 +74,8 @@ pub fn build(self: *Self, changes: []const Change) !void {
7674
}
7775

7876
try self.attributes.ensureTotalCapacity(self.allocator, num_attributes);
79-
try self.docs.ensureTotalCapacity(num_docs);
80-
try self.items.ensureTotalCapacity(num_items);
77+
try self.docs.ensureTotalCapacity(self.allocator, num_docs);
78+
try self.items.ensureTotalCapacity(self.allocator, num_items);
8179

8280
var i = changes.len;
8381
while (i > 0) {
@@ -124,14 +122,14 @@ pub fn merge(self: *Self, merger: *SegmentMerger(Self)) !void {
124122
self.attributes.deinit(self.allocator);
125123
self.attributes = merger.segment.attributes.move();
126124

127-
self.docs.deinit();
125+
self.docs.deinit(self.allocator);
128126
self.docs = merger.segment.docs.move();
129127

130128
self.items.clearRetainingCapacity();
131-
try self.items.ensureTotalCapacity(merger.estimated_size);
129+
try self.items.ensureTotalCapacity(self.allocator, merger.estimated_size);
132130
while (true) {
133131
const item = try merger.read() orelse break;
134-
try self.items.append(item);
132+
try self.items.append(self.allocator, item);
135133
merger.advance();
136134
}
137135
}

src/filefmt.zig

+15-9
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,12 @@ test "writeBlock/readBlock/readFirstItemFromBlock" {
200200
var segment = MemorySegment.init(std.testing.allocator, .{});
201201
defer segment.deinit(.delete);
202202

203-
try segment.items.append(.{ .hash = 1, .id = 1 });
204-
try segment.items.append(.{ .hash = 2, .id = 1 });
205-
try segment.items.append(.{ .hash = 3, .id = 1 });
206-
try segment.items.append(.{ .hash = 3, .id = 2 });
207-
try segment.items.append(.{ .hash = 4, .id = 1 });
203+
try segment.items.ensureTotalCapacity(std.testing.allocator, 5);
204+
segment.items.appendAssumeCapacity(.{ .hash = 1, .id = 1 });
205+
segment.items.appendAssumeCapacity(.{ .hash = 2, .id = 1 });
206+
segment.items.appendAssumeCapacity(.{ .hash = 3, .id = 1 });
207+
segment.items.appendAssumeCapacity(.{ .hash = 3, .id = 2 });
208+
segment.items.appendAssumeCapacity(.{ .hash = 4, .id = 1 });
208209

209210
const block_size = 1024;
210211
var block_data: [block_size]u8 = undefined;
@@ -428,7 +429,12 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
428429
}
429430

430431
if (header.has_docs) {
431-
try unpacker.readMapInto(&segment.docs);
432+
// FIXME nicer api in msgpack.zig
433+
var docs = std.AutoHashMap(u32, bool).init(segment.allocator);
434+
defer docs.deinit();
435+
try unpacker.readMapInto(&docs);
436+
segment.docs.deinit(segment.allocator);
437+
segment.docs = docs.unmanaged.move();
432438
}
433439

434440
const block_size = header.block_size;
@@ -437,8 +443,8 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
437443

438444
const blocks_data_start = fixed_buffer_stream.pos;
439445

440-
const estimated_block_count = (raw_data.len - fixed_buffer_stream.pos) / block_size;
441-
try segment.index.ensureTotalCapacity(estimated_block_count);
446+
const max_possible_block_count = (raw_data.len - fixed_buffer_stream.pos) / block_size;
447+
try segment.index.ensureTotalCapacity(segment.allocator, max_possible_block_count);
442448

443449
var num_items: u32 = 0;
444450
var num_blocks: u32 = 0;
@@ -452,7 +458,7 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
452458
if (block_header.num_items == 0) {
453459
break;
454460
}
455-
try segment.index.append(block_header.first_item.hash);
461+
segment.index.appendAssumeCapacity(block_header.first_item.hash);
456462
num_items += block_header.num_items;
457463
num_blocks += 1;
458464
crc.update(block_data[0..]);

src/segment_list.zig

+2-2
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,11 @@ pub fn SegmentListManager(Segment: type) type {
209209
var target = try List.createSegment(allocator, self.options);
210210
defer List.destroySegment(allocator, &target);
211211

212-
var merger = SegmentMerger(Segment).init(allocator, segments.value);
212+
var merger = try SegmentMerger(Segment).init(allocator, segments.value, candidate.end - candidate.start);
213213
defer merger.deinit();
214214

215215
for (segments.value.nodes.items[candidate.start..candidate.end]) |segment| {
216-
try merger.addSource(segment.value);
216+
merger.addSource(segment.value);
217217
}
218218
try merger.prepare();
219219

src/segment_merger.zig

+28-23
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@ const SharedPtr = @import("utils/shared_ptr.zig").SharedPtr;
88
pub const MergedSegmentInfo = struct {
99
info: SegmentInfo = .{},
1010
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
11-
docs: std.AutoHashMap(u32, bool),
11+
docs: std.AutoHashMapUnmanaged(u32, bool) = .{},
12+
13+
pub fn deinit(self: *MergedSegmentInfo, allocator: std.mem.Allocator) void {
14+
self.attributes.deinit(allocator);
15+
self.docs.deinit(allocator);
16+
}
1217
};
1318

1419
pub fn SegmentMerger(comptime Segment: type) type {
@@ -17,7 +22,12 @@ pub fn SegmentMerger(comptime Segment: type) type {
1722

1823
const Source = struct {
1924
reader: Segment.Reader,
20-
skip_docs: std.AutoHashMap(u32, void),
25+
skip_docs: std.AutoHashMapUnmanaged(u32, void) = .{},
26+
27+
pub fn deinit(self: *Source, allocator: std.mem.Allocator) void {
28+
self.reader.close();
29+
self.skip_docs.deinit(allocator);
30+
}
2131

2232
pub fn read(self: *Source) !?Item {
2333
while (true) {
@@ -36,38 +46,33 @@ pub fn SegmentMerger(comptime Segment: type) type {
3646
};
3747

3848
allocator: std.mem.Allocator,
39-
sources: std.ArrayList(Source),
4049
collection: *SegmentList(Segment),
41-
segment: MergedSegmentInfo,
50+
sources: std.ArrayListUnmanaged(Source) = .{},
51+
segment: MergedSegmentInfo = .{},
4252
estimated_size: usize = 0,
4353

4454
current_item: ?Item = null,
4555

46-
pub fn init(allocator: std.mem.Allocator, collection: *SegmentList(Segment)) Self {
56+
pub fn init(allocator: std.mem.Allocator, collection: *SegmentList(Segment), num_sources: usize) !Self {
4757
return .{
4858
.allocator = allocator,
49-
.sources = std.ArrayList(Source).init(allocator),
5059
.collection = collection,
51-
.segment = .{
52-
.docs = std.AutoHashMap(u32, bool).init(allocator),
53-
},
60+
.sources = try std.ArrayListUnmanaged(Source).initCapacity(allocator, num_sources),
5461
};
5562
}
5663

5764
pub fn deinit(self: *Self) void {
5865
for (self.sources.items) |*source| {
59-
source.reader.close();
60-
source.skip_docs.deinit();
66+
source.deinit(self.allocator);
6167
}
62-
self.sources.deinit();
63-
self.segment.docs.deinit();
68+
self.sources.deinit(self.allocator);
69+
self.segment.deinit(self.allocator);
6470
self.* = undefined;
6571
}
6672

67-
pub fn addSource(self: *Self, source: *Segment) !void {
68-
try self.sources.append(.{
73+
pub fn addSource(self: *Self, source: *Segment) void {
74+
self.sources.appendAssumeCapacity(.{
6975
.reader = source.reader(),
70-
.skip_docs = std.AutoHashMap(u32, void).init(self.allocator),
7176
});
7277
}
7378

@@ -100,7 +105,7 @@ pub fn SegmentMerger(comptime Segment: type) type {
100105
}
101106
}
102107

103-
try self.segment.docs.ensureTotalCapacity(total_docs);
108+
try self.segment.docs.ensureTotalCapacity(self.allocator, total_docs);
104109
for (sources) |*source| {
105110
const segment = source.reader.segment;
106111
var docs_added: usize = 0;
@@ -111,10 +116,10 @@ pub fn SegmentMerger(comptime Segment: type) type {
111116
const doc_id = entry.key_ptr.*;
112117
const doc_status = entry.value_ptr.*;
113118
if (!self.collection.hasNewerVersion(doc_id, segment.info.version)) {
114-
try self.segment.docs.put(doc_id, doc_status);
119+
try self.segment.docs.put(self.allocator, doc_id, doc_status);
115120
docs_added += 1;
116121
} else {
117-
try source.skip_docs.put(doc_id, {});
122+
try source.skip_docs.put(self.allocator, doc_id, {});
118123
}
119124
}
120125
if (docs_found > 0) {
@@ -158,7 +163,7 @@ test "merge segments" {
158163
var collection = try SegmentList(MemorySegment).init(std.testing.allocator, 3);
159164
defer collection.deinit(std.testing.allocator, .delete);
160165

161-
var merger = SegmentMerger(MemorySegment).init(std.testing.allocator, &collection);
166+
var merger = try SegmentMerger(MemorySegment).init(std.testing.allocator, &collection, 3);
162167
defer merger.deinit();
163168

164169
var node1 = try SegmentList(MemorySegment).createSegment(std.testing.allocator, .{});
@@ -174,9 +179,9 @@ test "merge segments" {
174179
node2.value.info = .{ .version = 12, .merges = 0 };
175180
node3.value.info = .{ .version = 13, .merges = 0 };
176181

177-
try merger.addSource(node1.value);
178-
try merger.addSource(node2.value);
179-
try merger.addSource(node3.value);
182+
merger.addSource(node1.value);
183+
merger.addSource(node2.value);
184+
merger.addSource(node3.value);
180185

181186
try merger.prepare();
182187

0 commit comments

Comments
 (0)