Explicit a scenario where the scheduling yields unbalanced solutions

rdettai · rdettai · commit dd9db2194606 · 2025-06-10T14:45:11.000+02:00
diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs
@@ -576,7 +576,7 @@ fn add_shard_to_indexer(
     }
 }
 
-// If the total node capacities is lower than 110% of the problem load, this
+// If the total node capacities is lower than 120% of the problem load, this
 // function scales the load of the indexer to reach this limit.
 fn inflate_node_capacities_if_necessary(problem: &mut SchedulingProblem) {
     // First we scale the problem to the point where any indexer can fit the largest shard.
diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs
@@ -18,7 +18,6 @@ use std::collections::btree_map::Entry;
 
 use itertools::Itertools;
 use quickwit_proto::indexing::CpuCapacity;
-use tracing::warn;
 
 use super::scheduling_logic_model::*;
 use crate::indexing_scheduler::scheduling::inflate_node_capacities_if_necessary;
@@ -41,7 +40,7 @@ pub fn solve(
     previous_solution: SchedulingSolution,
 ) -> SchedulingSolution {
     // We first inflate the indexer capacities to make sure they globally
-    // have at least 110% of the total problem load. This is done proportionally
+    // have at least 120% of the total problem load. This is done proportionally
     // to their original capacity.
     inflate_node_capacities_if_necessary(&mut problem);
     // As a heuristic, to offer stability, we work iteratively
@@ -294,21 +293,23 @@ fn place_unassigned_shards_ignoring_affinity(
         Reverse(load)
     });
 
-    // Thanks to the call to `inflate_node_capacities_if_necessary`,
-    // we are certain that even on our first attempt, the total capacity of the indexer exceeds 120%
-    // of the partial solution.
+    // Thanks to the call to `inflate_node_capacities_if_necessary`, we are
+    // certain that even on our first attempt, the total capacity of the indexer
+    // exceeds 120% of the partial solution. If a large shard needs to be placed
+    // in an already well balanced solution, it may not fit on any node. In that
+    // case, we iteratively grow the virtual capacity until it can be placed.
     //
-    // 1.2^30 is about 240.
-    // If we reach 30 attempts we are certain to have a logical bug.
+    // 1.2^30 is about 240. If we reach 30 attempts we are certain to have a
+    // logical bug.
     for attempt_number in 0..30 {
         match attempt_place_unassigned_shards(&unassigned_shards[..], &problem, partial_solution) {
-            Ok(solution) => {
-                if attempt_number != 0 {
-                    warn!(
-                        attempt_number = attempt_number,
-                        "required to scale node capacity"
-                    );
-                }
+            Ok(mut solution) => {
+                // the higher the attempt number, the more unbalanced the solution
+                tracing::warn!(
+                    attempt_number = attempt_number,
+                    "capacity re-scaled, scheduling solution likely unbalanced"
+                );
+                solution.capacity_scaling_iterations = attempt_number;
                 return solution;
             }
             Err(NotEnoughCapacity) => {
@@ -783,4 +784,19 @@ mod tests {
             solve(problem, solution);
         }
     }
+
+    #[test]
+    fn test_capacity_scaling_iteration_required() {
+        // Create a problem where affinity constraints cause suboptimal placement
+        // requiring iterative scaling despite initial capacity scaling.
+        let mut problem =
+            SchedulingProblem::with_indexer_cpu_capacities(vec![mcpu(3000), mcpu(3000)]);
+        problem.add_source(1, NonZeroU32::new(2500).unwrap()); // Source 0
+        problem.add_source(1, NonZeroU32::new(2500).unwrap()); // Source 1
+        problem.add_source(1, NonZeroU32::new(1500).unwrap()); // Source 2
+        let previous_solution = problem.new_solution();
+        let solution = solve(problem, previous_solution);
+
+        assert_eq!(solution.capacity_scaling_iterations, 1);
+    }
 }
diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs
@@ -232,12 +232,15 @@ impl IndexerAssignment {
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct SchedulingSolution {
     pub indexer_assignments: Vec<IndexerAssignment>,
+    // used for tests
+    pub capacity_scaling_iterations: usize,
 }
 
 impl SchedulingSolution {
     pub fn with_num_indexers(num_indexers: usize) -> SchedulingSolution {
         SchedulingSolution {
             indexer_assignments: (0..num_indexers).map(IndexerAssignment::new).collect(),
+            capacity_scaling_iterations: 0,
         }
     }
     pub fn num_indexers(&self) -> usize {

Original file line number	Diff line number	Diff line change
`@@ -576,7 +576,7 @@ fn add_shard_to_indexer(`
`576`	`576`	`}`
`577`	`577`	`}`
`578`	`578`
`579`		`-// If the total node capacities is lower than 110% of the problem load, this`
	`579`	`+// If the total node capacities is lower than 120% of the problem load, this`
`580`	`580`	`// function scales the load of the indexer to reach this limit.`
`581`	`581`	`fn inflate_node_capacities_if_necessary(problem: &mut SchedulingProblem) {`
`582`	`582`	`// First we scale the problem to the point where any indexer can fit the largest shard.`
Original file line number	Diff line number	Diff line change
`@@ -232,12 +232,15 @@ impl IndexerAssignment {`
`232`	`232`	`#[derive(Clone, Debug, Eq, PartialEq)]`
`233`	`233`	`pub struct SchedulingSolution {`
`234`	`234`	`pub indexer_assignments: Vec<IndexerAssignment>,`
	`235`	`+ // used for tests`
	`236`	`+ pub capacity_scaling_iterations: usize,`
`235`	`237`	`}`
`236`	`238`
`237`	`239`	`impl SchedulingSolution {`
`238`	`240`	`pub fn with_num_indexers(num_indexers: usize) -> SchedulingSolution {`
`239`	`241`	`SchedulingSolution {`
`240`	`242`	`indexer_assignments: (0..num_indexers).map(IndexerAssignment::new).collect(),`
	`243`	`+ capacity_scaling_iterations: 0,`
`241`	`244`	`}`
`242`	`245`	`}`
`243`	`246`	`pub fn num_indexers(&self) -> usize {`