1
1
const std = @import ("std" );
2
2
const log = std .log .scoped (.segment_merge_policy );
3
3
4
- const verbose = false ;
4
+ const verbose = true ;
5
5
6
6
pub fn MergeCandidate (comptime Segment : type ) type {
7
7
return struct {
@@ -24,6 +24,13 @@ pub fn TieredMergePolicy(comptime T: type) type {
24
24
segments_per_merge : u32 = 10 ,
25
25
segments_per_level : u32 = 10 ,
26
26
27
+ strategy : Strategy = .balanced ,
28
+
29
+ const Strategy = enum {
30
+ balanced ,
31
+ aggressive ,
32
+ };
33
+
27
34
const SegmentList = std .DoublyLinkedList (T );
28
35
const SegmentNode = SegmentList .Node ;
29
36
@@ -74,21 +81,34 @@ pub fn TieredMergePolicy(comptime T: type) type {
74
81
return num_allowed_segments + num_oversized_segments ;
75
82
}
76
83
77
- pub fn findSegmentsToMerge (self : Self , segments : SegmentList ) ? Candidate {
84
+ pub const FindSegmentsToMergeResult = struct {
85
+ num_allowed_segments : usize ,
86
+ candidate : ? Candidate ,
87
+ };
88
+
89
+ pub fn findSegmentsToMerge (self : Self , segments : SegmentList ) FindSegmentsToMergeResult {
78
90
const num_segments = segments .len ;
79
91
const num_allowed_segments = self .calculateBudget (segments );
80
92
log .debug ("budget: {} segments" , .{num_allowed_segments });
81
93
94
+ var result = FindSegmentsToMergeResult {
95
+ .num_allowed_segments = num_allowed_segments ,
96
+ .candidate = null ,
97
+ };
98
+
82
99
if (num_allowed_segments >= segments .len ) {
83
- return null ;
100
+ return result ;
84
101
}
85
102
86
103
const merge_factor = @min (self .segments_per_merge , self .segments_per_level );
87
104
const log_merge_factor = @log2 (@as (f64 , @floatFromInt (merge_factor )));
88
105
const log_min_segment_size = @log2 (@as (f64 , @floatFromInt (self .min_segment_size )));
89
106
90
107
const tier_scaling_factor = @as (f64 , @floatFromInt (num_allowed_segments )) / @as (f64 , @floatFromInt (num_segments )) / @as (f64 , @floatFromInt (self .segments_per_level ));
91
- var tier = @as (f64 , @floatFromInt (num_segments - 1 )) * tier_scaling_factor ;
108
+ const top_tier = @as (f64 , @floatFromInt (num_segments )) * tier_scaling_factor ;
109
+ var tier = top_tier ;
110
+
111
+ var segment_no : usize = 0 ;
92
112
93
113
var best_candidate : ? Candidate = null ;
94
114
var best_score : f64 = 0.0 ;
@@ -98,6 +118,7 @@ pub fn TieredMergePolicy(comptime T: type) type {
98
118
var iter = segments .first ;
99
119
while (iter ) | current_node | : (iter = current_node .next ) {
100
120
tier -= tier_scaling_factor ;
121
+ segment_no += 1 ;
101
122
102
123
if (current_node .data .getSize () > self .max_segment_size ) {
103
124
// skip oversized segments
@@ -126,7 +147,16 @@ pub fn TieredMergePolicy(comptime T: type) type {
126
147
127
148
const log_size = @log2 (@as (f64 , @floatFromInt (candidate .size )));
128
149
const candidate_tier = (log_size - log_min_segment_size ) / log_merge_factor ;
129
- const score = candidate_tier - tier ;
150
+ var score = candidate_tier - tier ;
151
+
152
+ const adjustment_factor : f64 = switch (self .strategy ) {
153
+ .balanced = > 1.2 ,
154
+ .aggressive = > 1.8 ,
155
+ };
156
+
157
+ const adjustment = @as (f64 , @floatFromInt (candidate .num_segments )) / @as (f64 , @floatFromInt (self .segments_per_merge ));
158
+ score = score - adjustment_factor * adjustment ;
159
+
130
160
// std.debug.print("candidate {}-{}: len={} size={} candidate_tier={}, score={d}\n", .{ candidate.start.data.id, candidate.end.data.id, candidate.num_segments, candidate.size, candidate_tier, score });
131
161
if (score < best_score or best_candidate == null ) {
132
162
best_candidate = candidate ;
@@ -142,7 +172,8 @@ pub fn TieredMergePolicy(comptime T: type) type {
142
172
max_merge_size = current_node .data .getSize ();
143
173
}
144
174
145
- return best_candidate ;
175
+ result .candidate = best_candidate ;
176
+ return result ;
146
177
}
147
178
};
148
179
}
@@ -186,6 +217,7 @@ test "TieredMergePolicy" {
186
217
.max_segment_size = 100000 ,
187
218
.segments_per_merge = 10 ,
188
219
.segments_per_level = 5 ,
220
+ .strategy = .aggressive ,
189
221
};
190
222
191
223
var last_id : u64 = 1 ;
@@ -200,6 +232,9 @@ test "TieredMergePolicy" {
200
232
last_id += 1 ;
201
233
}
202
234
235
+ var total_merge_size : u64 = 0 ;
236
+ var total_merge_count : u64 = 0 ;
237
+
203
238
for (0.. 1000) | _ | {
204
239
if (verbose ) {
205
240
std .debug .print ("---\n " , .{});
@@ -221,14 +256,22 @@ test "TieredMergePolicy" {
221
256
}
222
257
}
223
258
224
- const candidate = policy .findSegmentsToMerge (segments ) orelse continue ;
259
+ const result = policy .findSegmentsToMerge (segments );
260
+ const candidate = result .candidate orelse continue ;
261
+
262
+ total_merge_size += candidate .num_segments ;
263
+ total_merge_count += 1 ;
225
264
226
265
if (verbose ) {
227
266
std .debug .print ("merging {}-{}\n " , .{ candidate .start .data .id , candidate .end .data .id });
228
267
}
229
268
try applyMerge (MockSegment , & segments , candidate , std .testing .allocator );
230
269
}
231
270
271
+ if (verbose ) {
272
+ std .debug .print ("avg merge size: {}\n " , .{total_merge_size / total_merge_count });
273
+ }
274
+
232
275
const num_allowed_segmens = policy .calculateBudget (segments );
233
276
try std .testing .expect (num_allowed_segmens >= segments .len );
234
277
}
0 commit comments