From ada9e0fc4d9571cf1062573f0c8d85fd3f55b7f6 Mon Sep 17 00:00:00 2001 From: Lukas Lalinsky Date: Thu, 5 Dec 2024 21:27:40 +0100 Subject: [PATCH 1/2] Add support for named attributes (strings) --- src/FileSegment.zig | 6 +++++- src/Index.zig | 4 ++-- src/MemorySegment.zig | 10 ++++++++-- src/change.zig | 2 +- src/filefmt.zig | 2 +- src/segment_merger.zig | 15 ++++++++++++--- src/server.zig | 18 ++++++++---------- tests/test_index_api.py | 12 +++++++++--- 8 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/FileSegment.zig b/src/FileSegment.zig index cc50319..31b3d1e 100644 --- a/src/FileSegment.zig +++ b/src/FileSegment.zig @@ -20,7 +20,7 @@ pub const Options = struct { allocator: std.mem.Allocator, dir: std.fs.Dir, info: SegmentInfo = .{}, -attributes: std.AutoHashMapUnmanaged(u64, u64) = .{}, +attributes: std.StringHashMapUnmanaged(u64) = .{}, docs: std.AutoHashMapUnmanaged(u32, bool) = .{}, index: std.ArrayListUnmanaged(u32) = .{}, block_size: usize = 0, @@ -40,6 +40,10 @@ pub fn init(allocator: std.mem.Allocator, options: Options) Self { } pub fn deinit(self: *Self, delete_file: KeepOrDelete) void { + var iter = self.attributes.iterator(); + while (iter.next()) |e| { + self.allocator.free(e.key_ptr.*); + } self.attributes.deinit(self.allocator); self.docs.deinit(self.allocator); self.index.deinit(self.allocator); diff --git a/src/Index.zig b/src/Index.zig index cc4fc27..c039fc8 100644 --- a/src/Index.zig +++ b/src/Index.zig @@ -501,7 +501,7 @@ pub fn getDocInfo(self: *Self, doc_id: u32) !?DocInfo { pub const IndexInfo = struct { version: u64, - attributes: std.AutoHashMapUnmanaged(u64, u64), + attributes: std.StringHashMapUnmanaged(u64), pub fn deinit(self: *IndexInfo, allocator: std.mem.Allocator) void { self.attributes.deinit(allocator); @@ -512,7 +512,7 @@ pub fn getInfo(self: *Self, allocator: std.mem.Allocator) !IndexInfo { var snapshot = self.acquireSegments(); defer self.releaseSegments(&snapshot); // FIXME this possibly deletes orphaned segments, do it in a separate thread - var attributes: std.AutoHashMapUnmanaged(u64, u64) = .{}; + var attributes: std.StringHashMapUnmanaged(u64) = .{}; errdefer attributes.deinit(allocator); var version: u64 = 0; diff --git a/src/MemorySegment.zig b/src/MemorySegment.zig index b26335f..41442b8 100644 --- a/src/MemorySegment.zig +++ b/src/MemorySegment.zig @@ -19,7 +19,7 @@ pub const Options = struct {}; allocator: std.mem.Allocator, info: SegmentInfo = .{}, -attributes: std.AutoHashMapUnmanaged(u64, u64) = .{}, +attributes: std.StringHashMapUnmanaged(u64) = .{}, docs: std.AutoHashMapUnmanaged(u32, bool) = .{}, items: std.ArrayListUnmanaged(Item) = .{}, frozen: bool = false, @@ -34,6 +34,10 @@ pub fn init(allocator: std.mem.Allocator, opts: Options) Self { pub fn deinit(self: *Self, delete_file: KeepOrDelete) void { _ = delete_file; + var iter = self.attributes.iterator(); + while (iter.next()) |e| { + self.allocator.free(e.key_ptr.*); + } self.attributes.deinit(self.allocator); self.docs.deinit(self.allocator); self.items.deinit(self.allocator); @@ -99,8 +103,10 @@ pub fn build(self: *Self, changes: []const Change) !void { } }, .set_attribute => |op| { - const result = self.attributes.getOrPutAssumeCapacity(op.key); + const result = self.attributes.getOrPutAssumeCapacity(op.name); if (!result.found_existing) { + errdefer self.attributes.removeByPtr(result.key_ptr); + result.key_ptr.* = try self.allocator.dupe(u8, op.name); result.value_ptr.* = op.value; } }, diff --git a/src/change.zig b/src/change.zig index 208a9a2..803ca1c 100644 --- a/src/change.zig +++ b/src/change.zig @@ -18,7 +18,7 @@ pub const Delete = struct { }; pub const SetAttribute = struct { - key: u64, + name: []const u8, value: u64, pub fn msgpackFormat() msgpack.StructFormat { diff --git a/src/filefmt.zig b/src/filefmt.zig index 03b9649..f5d89da 100644 --- a/src/filefmt.zig +++ b/src/filefmt.zig @@ -421,7 +421,7 @@ pub fn readSegmentFile(dir: fs.Dir, info: SegmentInfo, segment: *FileSegment) !v if (header.has_attributes) { // FIXME nicer api in msgpack.zig - var attributes = std.AutoHashMap(u64, u64).init(segment.allocator); + var attributes = std.StringHashMap(u64).init(segment.allocator); defer attributes.deinit(); try unpacker.readMapInto(&attributes); segment.attributes.deinit(segment.allocator); diff --git a/src/segment_merger.zig b/src/segment_merger.zig index 13ac404..b29f0bd 100644 --- a/src/segment_merger.zig +++ b/src/segment_merger.zig @@ -7,10 +7,14 @@ const SharedPtr = @import("utils/shared_ptr.zig").SharedPtr; pub const MergedSegmentInfo = struct { info: SegmentInfo = .{}, - attributes: std.AutoHashMapUnmanaged(u64, u64) = .{}, + attributes: std.StringHashMapUnmanaged(u64) = .{}, docs: std.AutoHashMapUnmanaged(u32, bool) = .{}, pub fn deinit(self: *MergedSegmentInfo, allocator: std.mem.Allocator) void { + var iter = self.attributes.iterator(); + while (iter.next()) |e| { + allocator.free(e.key_ptr.*); + } self.attributes.deinit(allocator); self.docs.deinit(allocator); } @@ -99,9 +103,14 @@ pub fn SegmentMerger(comptime Segment: type) type { const segment = source.reader.segment; var iter = segment.attributes.iterator(); while (iter.next()) |entry| { - const key = entry.key_ptr.*; + const name = entry.key_ptr.*; const value = entry.value_ptr.*; - self.segment.attributes.putAssumeCapacity(key, value); + const result = self.segment.attributes.getOrPutAssumeCapacity(name); + if (!result.found_existing) { + errdefer self.segment.attributes.removeByPtr(result.key_ptr); + result.key_ptr.* = try self.allocator.dupe(u8, name); + } + result.value_ptr.* = value; } } diff --git a/src/server.zig b/src/server.zig index e0283f5..8c1fe5b 100644 --- a/src/server.zig +++ b/src/server.zig @@ -243,14 +243,14 @@ fn getRequestBody(comptime T: type, req: *httpz.Request, res: *httpz.Response) ! switch (content_type) { .json => { - return json.parseFromSliceLeaky(T, req.arena, content, .{}) catch { - try writeErrorResponse(400, error.InvalidContent, req, res); + return json.parseFromSliceLeaky(T, req.arena, content, .{}) catch |err| { + try writeErrorResponse(400, err, req, res); return null; }; }, .msgpack => { - return msgpack.decodeFromSliceLeaky(T, req.arena, content) catch { - try writeErrorResponse(400, error.InvalidContent, req, res); + return msgpack.decodeFromSliceLeaky(T, req.arena, content) catch |err| { + try writeErrorResponse(400, err, req, res); return null; }; }, @@ -388,18 +388,16 @@ fn handleDeleteFingerprint(ctx: *Context, req: *httpz.Request, res: *httpz.Respo } const Attributes = struct { - attributes: std.AutoHashMapUnmanaged(u64, u64), + attributes: std.StringHashMapUnmanaged(u64), pub fn jsonStringify(self: Attributes, jws: anytype) !void { - try jws.beginArray(); + try jws.beginObject(); var iter = self.attributes.iterator(); while (iter.next()) |entry| { - try jws.beginArray(); - try jws.write(entry.key_ptr.*); + try jws.objectField(entry.key_ptr.*); try jws.write(entry.value_ptr.*); - try jws.endArray(); } - try jws.endArray(); + try jws.endObject(); } pub fn msgpackWrite(self: Attributes, packer: anytype) !void { diff --git a/tests/test_index_api.py b/tests/test_index_api.py index b65d5b7..bf4a73d 100644 --- a/tests/test_index_api.py +++ b/tests/test_index_api.py @@ -23,13 +23,19 @@ def test_get_index_not_found(client, index_name): @pytest.mark.parametrize('fmt', ['json', 'msgpack']) def test_get_index(client, index_name, create_index, fmt): - req = client.get(f'/{index_name}', headers=headers(fmt)) + req = client.post(f'/{index_name}/_update', json={ + 'changes': [ + {'set_attribute': {'name': 'foo', 'value': 1234}}, + ], + }) assert req.status_code == 200, req.content + req = client.get(f'/{index_name}', headers=headers(fmt)) + assert req.status_code == 200, req.content if fmt == 'json': - expected = {'version': 0, 'attributes': []} + expected = {'version': 1, 'attributes': {'foo': 1234}} else: - expected = {'v': 0, 'a': {}} + expected = {'v': 1, 'a': {'foo': 1234}} assert decode(fmt, req.content) == expected From a485ca0f3266727670f4197ee878cffd86e3f442 Mon Sep 17 00:00:00 2001 From: Lukas Lalinsky Date: Thu, 5 Dec 2024 21:40:54 +0100 Subject: [PATCH 2/2] Fix memory management in Index.getInfo --- src/Index.zig | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Index.zig b/src/Index.zig index c039fc8..13ea873 100644 --- a/src/Index.zig +++ b/src/Index.zig @@ -513,7 +513,13 @@ pub fn getInfo(self: *Self, allocator: std.mem.Allocator) !IndexInfo { defer self.releaseSegments(&snapshot); // FIXME this possibly deletes orphaned segments, do it in a separate thread var attributes: std.StringHashMapUnmanaged(u64) = .{}; - errdefer attributes.deinit(allocator); + errdefer { + var iter = attributes.iterator(); + while (iter.next()) |e| { + allocator.free(e.key_ptr.*); + } + attributes.deinit(allocator); + } var version: u64 = 0; inline for (segment_lists) |n| { @@ -521,7 +527,12 @@ pub fn getInfo(self: *Self, allocator: std.mem.Allocator) !IndexInfo { for (segments.value.nodes.items) |node| { var iter = node.value.attributes.iterator(); while (iter.next()) |entry| { - try attributes.put(allocator, entry.key_ptr.*, entry.value_ptr.*); + const result = try attributes.getOrPut(allocator, entry.key_ptr.*); + if (!result.found_existing) { + errdefer attributes.removeByPtr(entry.key_ptr); + result.key_ptr.* = try allocator.dupe(u8, entry.key_ptr.*); + } + result.value_ptr.* = entry.value_ptr.*; } std.debug.assert(node.value.info.version > version); version = node.value.info.version;