Skip to content

Commit 5f32ee4

Browse files
authored
Merge pull request #35 from acoustid/attributes
Add support for attributes to assist external replication
2 parents e033ba6 + 72b3ec6 commit 5f32ee4

9 files changed

+141
-11
lines changed

build.zig.zon

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
.hash = "12201776681f1e5ec6df7df30786e90771c5de564c941a309c73c4299c7864ddb4c3",
1717
},
1818
.msgpack = .{
19-
.url = "git+https://github.com/lalinsky/msgpack.zig?ref=main#7c0a9846b33063199e56e50d683b4ca8785c773e",
20-
.hash = "12207a2d5cff5690049e70a0ce65c8a7b67bf385abc3acf86caa42db2a921c83a269",
19+
.url = "git+https://github.com/lalinsky/msgpack.zig?ref=v0.1.0#d141ef4e1f585fecbbcdac9a9f85e41b5759182c",
20+
.hash = "1220cb5fbd418638a830cb3c8a47d95d766d5ec1904631a14cde18cad89047165404",
2121
},
2222
},
2323
.paths = .{

src/FileSegment.zig

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub const Options = struct {
2020
allocator: std.mem.Allocator,
2121
dir: std.fs.Dir,
2222
info: SegmentInfo = .{},
23+
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
2324
docs: std.AutoHashMap(u32, bool),
2425
index: std.ArrayList(u32),
2526
block_size: usize = 0,
@@ -41,6 +42,7 @@ pub fn init(allocator: std.mem.Allocator, options: Options) Self {
4142
}
4243

4344
pub fn deinit(self: *Self, delete_file: KeepOrDelete) void {
45+
self.attributes.deinit(self.allocator);
4446
self.docs.deinit();
4547
self.index.deinit();
4648

src/Index.zig

+32-2
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,11 @@ fn releaseSegments(self: *Self, segments: *SegmentsSnapshot) void {
446446
FileSegmentList.destroySegments(self.allocator, &segments.file_segments);
447447
}
448448

449+
const segment_lists = [_][]const u8{
450+
"file_segments",
451+
"memory_segments",
452+
};
453+
449454
pub fn search(self: *Self, hashes: []const u32, allocator: std.mem.Allocator, deadline: Deadline) !SearchResults {
450455
const sorted_hashes = try allocator.dupe(u32, hashes);
451456
defer allocator.free(sorted_hashes);
@@ -457,14 +462,39 @@ pub fn search(self: *Self, hashes: []const u32, allocator: std.mem.Allocator, de
457462
var snapshot = self.acquireSegments();
458463
defer self.releaseSegments(&snapshot); // FIXME this possibly deletes orphaned segments, do it in a separate thread
459464

460-
try snapshot.file_segments.value.search(sorted_hashes, &results, deadline);
461-
try snapshot.memory_segments.value.search(sorted_hashes, &results, deadline);
465+
inline for (segment_lists) |n| {
466+
const segments = @field(snapshot, n);
467+
try segments.value.search(sorted_hashes, &results, deadline);
468+
}
462469

463470
results.sort();
464471

465472
return results;
466473
}
467474

475+
pub fn getAttributes(self: *Self, allocator: std.mem.Allocator) !std.AutoHashMapUnmanaged(u64, u64) {
476+
var result: std.AutoHashMapUnmanaged(u64, u64) = .{};
477+
errdefer result.deinit(allocator);
478+
479+
var snapshot = self.acquireSegments();
480+
defer self.releaseSegments(&snapshot); // FIXME this possibly deletes orphaned segments, do it in a separate thread
481+
482+
var last_version: u64 = 0;
483+
inline for (segment_lists) |n| {
484+
const segments = @field(snapshot, n);
485+
for (segments.value.nodes.items) |node| {
486+
var iter = node.value.attributes.iterator();
487+
while (iter.next()) |entry| {
488+
try result.put(allocator, entry.key_ptr.*, entry.value_ptr.*);
489+
}
490+
std.debug.assert(node.value.info.version > last_version);
491+
last_version = node.value.info.version;
492+
}
493+
}
494+
495+
return result;
496+
}
497+
468498
test {
469499
_ = @import("index_tests.zig");
470500
}

src/MemorySegment.zig

+19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ pub const Options = struct {};
1919

2020
allocator: std.mem.Allocator,
2121
info: SegmentInfo = .{},
22+
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
2223
docs: std.AutoHashMap(u32, bool),
2324
items: std.ArrayList(Item),
2425
frozen: bool = false,
@@ -34,6 +35,8 @@ pub fn init(allocator: std.mem.Allocator, opts: Options) Self {
3435

3536
pub fn deinit(self: *Self, delete_file: KeepOrDelete) void {
3637
_ = delete_file;
38+
39+
self.attributes.deinit(self.allocator);
3740
self.docs.deinit();
3841
self.items.deinit();
3942
}
@@ -54,6 +57,7 @@ pub fn getSize(self: Self) usize {
5457
}
5558

5659
pub fn build(self: *Self, changes: []const Change) !void {
60+
var num_attributes: u32 = 0;
5761
var num_docs: u32 = 0;
5862
var num_items: usize = 0;
5963
for (changes) |change| {
@@ -65,9 +69,13 @@ pub fn build(self: *Self, changes: []const Change) !void {
6569
.delete => {
6670
num_docs += 1;
6771
},
72+
.set_attribute => {
73+
num_attributes += 1;
74+
},
6875
}
6976
}
7077

78+
try self.attributes.ensureTotalCapacity(self.allocator, num_attributes);
7179
try self.docs.ensureTotalCapacity(num_docs);
7280
try self.items.ensureTotalCapacity(num_items);
7381

@@ -92,6 +100,12 @@ pub fn build(self: *Self, changes: []const Change) !void {
92100
result.value_ptr.* = false;
93101
}
94102
},
103+
.set_attribute => |op| {
104+
const result = self.attributes.getOrPutAssumeCapacity(op.key);
105+
if (!result.found_existing) {
106+
result.value_ptr.* = op.value;
107+
}
108+
},
95109
}
96110
}
97111

@@ -103,8 +117,13 @@ pub fn cleanup(self: *Self) void {
103117
}
104118

105119
pub fn merge(self: *Self, merger: *SegmentMerger(Self)) !void {
120+
std.debug.assert(self.allocator.ptr == merger.allocator.ptr);
121+
106122
self.info = merger.segment.info;
107123

124+
self.attributes.deinit(self.allocator);
125+
self.attributes = merger.segment.attributes.move();
126+
108127
self.docs.deinit();
109128
self.docs = merger.segment.docs.move();
110129

src/change.zig

+10
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,19 @@ pub const Delete = struct {
1717
}
1818
};
1919

20+
pub const SetAttribute = struct {
21+
key: u64,
22+
value: u64,
23+
24+
pub fn msgpackFormat() msgpack.StructFormat {
25+
return .{ .as_map = .{ .key = .{ .field_name_prefix = 1 } } };
26+
}
27+
};
28+
2029
pub const Change = union(enum) {
2130
insert: Insert,
2231
delete: Delete,
32+
set_attribute: SetAttribute,
2333

2434
pub fn msgpackFormat() msgpack.UnionFormat {
2535
return .{ .as_map = .{ .key = .{ .field_name_prefix = 1 } } };

src/filefmt.zig

+42-6
Original file line numberDiff line numberDiff line change
@@ -238,36 +238,57 @@ const segment_file_header_magic_v1: u32 = 0x53474D31; // "SGM1" in big endian
238238
const segment_file_footer_magic_v1: u32 = @byteSwap(segment_file_header_magic_v1);
239239

240240
pub const SegmentFileHeader = struct {
241-
magic: u32 = segment_file_header_magic_v1,
242-
block_size: u32,
241+
magic: u32,
243242
info: SegmentInfo,
243+
has_attributes: bool,
244+
has_docs: bool,
245+
block_size: u32,
244246

245247
pub fn msgpackFormat() msgpack.StructFormat {
246248
return .{
247249
.as_map = .{
248-
.key = .field_index,
250+
.key = .field_index, // FIXME
249251
.omit_defaults = false,
250252
.omit_nulls = true,
251253
},
252254
};
253255
}
256+
257+
pub fn msgpackFieldKey(field: std.meta.FieldEnum(@This())) u8 {
258+
return switch (field) {
259+
.magic => 0x00,
260+
.info => 0x01,
261+
.has_attributes => 0x02,
262+
.has_docs => 0x03,
263+
.block_size => 0x04,
264+
};
265+
}
254266
};
255267

256268
pub const SegmentFileFooter = struct {
257-
magic: u32 = segment_file_footer_magic_v1,
269+
magic: u32,
258270
num_items: u32,
259271
num_blocks: u32,
260272
checksum: u64,
261273

262274
pub fn msgpackFormat() msgpack.StructFormat {
263275
return .{
264276
.as_map = .{
265-
.key = .field_index,
277+
.key = .field_index, // FIXME
266278
.omit_defaults = false,
267279
.omit_nulls = true,
268280
},
269281
};
270282
}
283+
284+
pub fn msgpackFieldKey(field: std.meta.FieldEnum(@This())) u8 {
285+
return switch (field) {
286+
.magic => 0x00,
287+
.num_items => 0x01,
288+
.num_blocks => 0x02,
289+
.checksum => 0x03,
290+
};
291+
}
271292
};
272293

273294
pub fn deleteSegmentFile(dir: std.fs.Dir, info: SegmentInfo) !void {
@@ -299,11 +320,15 @@ pub fn writeSegmentFile(dir: std.fs.Dir, reader: anytype) !void {
299320
const packer = msgpack.packer(writer);
300321

301322
const header = SegmentFileHeader{
323+
.magic = segment_file_header_magic_v1,
302324
.block_size = block_size,
303325
.info = segment.info,
326+
.has_attributes = true,
327+
.has_docs = true,
304328
};
305329
try packer.write(SegmentFileHeader, header);
306330

331+
try packer.writeMap(segment.attributes);
307332
try packer.writeMap(segment.docs);
308333

309334
try buffered_writer.flush();
@@ -393,7 +418,18 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v
393418
segment.info = header.info;
394419
segment.block_size = header.block_size;
395420

396-
try unpacker.readMapInto(&segment.docs);
421+
if (header.has_attributes) {
422+
// FIXME nicer api in msgpack.zig
423+
var attributes = std.AutoHashMap(u64, u64).init(segment.allocator);
424+
defer attributes.deinit();
425+
try unpacker.readMapInto(&attributes);
426+
segment.attributes.deinit(segment.allocator);
427+
segment.attributes = attributes.unmanaged.move();
428+
}
429+
430+
if (header.has_docs) {
431+
try unpacker.readMapInto(&segment.docs);
432+
}
397433

398434
const block_size = header.block_size;
399435
const padding_size = block_size - fixed_buffer_stream.pos % block_size;

src/segment_merger.zig

+14
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const SharedPtr = @import("utils/shared_ptr.zig").SharedPtr;
77

88
pub const MergedSegmentInfo = struct {
99
info: SegmentInfo = .{},
10+
attributes: std.AutoHashMapUnmanaged(u64, u64) = .{},
1011
docs: std.AutoHashMap(u32, bool),
1112
};
1213

@@ -76,16 +77,29 @@ pub fn SegmentMerger(comptime Segment: type) type {
7677
return error.NoSources;
7778
}
7879

80+
var total_attributes: u32 = 0;
7981
var total_docs: u32 = 0;
8082
for (sources, 0..) |source, i| {
8183
if (i == 0) {
8284
self.segment.info = source.reader.segment.info;
8385
} else {
8486
self.segment.info = SegmentInfo.merge(self.segment.info, source.reader.segment.info);
8587
}
88+
total_attributes += source.reader.segment.attributes.count();
8689
total_docs += source.reader.segment.docs.count();
8790
}
8891

92+
try self.segment.attributes.ensureTotalCapacity(self.allocator, total_attributes);
93+
for (sources) |*source| {
94+
const segment = source.reader.segment;
95+
var iter = segment.attributes.iterator();
96+
while (iter.next()) |entry| {
97+
const key = entry.key_ptr.*;
98+
const value = entry.value_ptr.*;
99+
self.segment.attributes.putAssumeCapacity(key, value);
100+
}
101+
}
102+
89103
try self.segment.docs.ensureTotalCapacity(total_docs);
90104
for (sources) |*source| {
91105
const segment = source.reader.segment;

src/server.zig

+19
Original file line numberDiff line numberDiff line change
@@ -270,16 +270,35 @@ fn handleHeadIndex(ctx: *Context, req: *httpz.Request, res: *httpz.Response) !vo
270270
return;
271271
}
272272

273+
const Attributes = struct {
274+
attributes: std.AutoHashMapUnmanaged(u64, u64),
275+
276+
pub fn jsonStringify(self: Attributes, jws: anytype) !void {
277+
try jws.beginArray();
278+
var iter = self.attributes.iterator();
279+
while (iter.next()) |entry| {
280+
try jws.beginArray();
281+
try jws.write(entry.key_ptr.*);
282+
try jws.write(entry.value_ptr.*);
283+
try jws.endArray();
284+
}
285+
try jws.endArray();
286+
}
287+
};
288+
273289
const GetIndexResponse = struct {
274290
status: []const u8,
291+
attributes: Attributes,
275292
};
276293

277294
fn handleGetIndex(ctx: *Context, req: *httpz.Request, res: *httpz.Response) !void {
278295
const index_ref = try getIndex(ctx, req, res, true) orelse return;
279296
defer releaseIndex(ctx, index_ref);
280297

298+
const attributes = try index_ref.index.getAttributes(req.arena);
281299
const response = GetIndexResponse{
282300
.status = "ok",
301+
.attributes = .{ .attributes = attributes },
283302
};
284303
return res.json(&response, .{});
285304
}

src/utils/shared_ptr.zig

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub fn RefCounter(comptime T: type) type {
1010

1111
pub fn init() Self {
1212
return .{
13-
.refs = std.atomic.Value(u32).init(1),
13+
.refs = std.atomic.Value(T).init(1),
1414
};
1515
}
1616

0 commit comments

Comments
 (0)