@@ -425,6 +425,7 @@ def test_plan_certain_data_shape():
425
425
"retention" : "PT8H" ,
426
426
"require_attached_disks" : True ,
427
427
"required_zone_size" : cluster_capacity .cluster_instance_count .mid ,
428
+ "require_same_instance_family" : False ,
428
429
},
429
430
)
430
431
@@ -435,3 +436,113 @@ def test_plan_certain_data_shape():
435
436
assert lr_clusters [0 ].count == cluster_capacity .cluster_instance_count .high
436
437
for lr in cap_plan :
437
438
print (lr .candidate_clusters .zonal [0 ])
439
+ families = set (
440
+ map (
441
+ lambda curr_plan : curr_plan .candidate_clusters .zonal [0 ].instance .family ,
442
+ cap_plan ,
443
+ )
444
+ )
445
+ # check that we did not restrict the instance family to only r7a
446
+ assert families != {"r7a" }
447
+
448
+
449
+ def test_plan_certain_data_shape_same_instance_type ():
450
+ """
451
+ Use current clusters cpu utilization to determine instance types directly as
452
+ supposed to extrapolating it from the Data Shape
453
+ """
454
+ cluster_capacity = CurrentZoneClusterCapacity (
455
+ cluster_instance_name = "r7a.4xlarge" ,
456
+ cluster_drive = Drive (
457
+ name = "gp3" ,
458
+ drive_type = DriveType .attached_ssd ,
459
+ size_gib = 5000 ,
460
+ block_size_kib = 16 ,
461
+ ),
462
+ cluster_instance_count = Interval (low = 15 , mid = 15 , high = 15 , confidence = 1 ),
463
+ cpu_utilization = Interval (
464
+ low = 5.441147804260254 ,
465
+ mid = 13.548842955300195 ,
466
+ high = 25.11203956604004 ,
467
+ confidence = 1 ,
468
+ ),
469
+ memory_utilization_gib = Interval (low = 0 , mid = 0 , high = 0 , confidence = 1 ),
470
+ network_utilization_mbps = Interval (
471
+ low = 4580.919447446355 ,
472
+ mid = 19451.59814477331 ,
473
+ high = 42963.441154527085 ,
474
+ confidence = 1 ,
475
+ ),
476
+ disk_utilization_gib = Interval (
477
+ low = 1341.579345703125 ,
478
+ mid = 1940.8741284013684 ,
479
+ high = 2437.607421875 ,
480
+ confidence = 1 ,
481
+ ),
482
+ )
483
+
484
+ desires = CapacityDesires (
485
+ service_tier = 1 ,
486
+ current_clusters = CurrentClusters (zonal = [cluster_capacity ]),
487
+ query_pattern = QueryPattern (
488
+ access_pattern = AccessPattern (AccessPattern .latency ),
489
+ # 2 consumers
490
+ estimated_read_per_second = Interval (low = 2 , mid = 2 , high = 4 , confidence = 1 ),
491
+ # 1 producer
492
+ estimated_write_per_second = Interval (low = 1 , mid = 1 , high = 1 , confidence = 0.98 ),
493
+ estimated_mean_read_latency_ms = Interval (low = 1 , mid = 1 , high = 1 , confidence = 1 ),
494
+ estimated_mean_write_latency_ms = Interval (
495
+ low = 1 , mid = 1 , high = 1 , confidence = 1
496
+ ),
497
+ estimated_mean_read_size_bytes = Interval (
498
+ low = 1024 , mid = 1024 , high = 1024 , confidence = 1
499
+ ),
500
+ estimated_mean_write_size_bytes = Interval (
501
+ low = 125000000 , mid = 579000000 , high = 1351000000 , confidence = 0.98
502
+ ),
503
+ estimated_read_parallelism = Interval (low = 1 , mid = 1 , high = 1 , confidence = 1 ),
504
+ estimated_write_parallelism = Interval (low = 1 , mid = 1 , high = 1 , confidence = 1 ),
505
+ read_latency_slo_ms = FixedInterval (low = 0.4 , mid = 4 , high = 10 , confidence = 0.98 ),
506
+ write_latency_slo_ms = FixedInterval (
507
+ low = 0.4 , mid = 4 , high = 10 , confidence = 0.98
508
+ ),
509
+ ),
510
+ data_shape = DataShape (
511
+ estimated_state_size_gib = Interval (
512
+ low = 44000 , mid = 86000 , high = 91000 , confidence = 1
513
+ ),
514
+ ),
515
+ )
516
+
517
+ cap_plan = planner .plan_certain (
518
+ model_name = "org.netflix.kafka" ,
519
+ region = "us-east-1" ,
520
+ num_results = 3 ,
521
+ num_regions = 4 ,
522
+ desires = desires ,
523
+ extra_model_arguments = {
524
+ "cluster_type" : ClusterType .ha ,
525
+ "retention" : "PT8H" ,
526
+ "require_attached_disks" : True ,
527
+ "required_zone_size" : cluster_capacity .cluster_instance_count .mid ,
528
+ "require_same_instance_family" : True ,
529
+ },
530
+ )
531
+
532
+ assert len (cap_plan ) >= 1
533
+ lr_clusters = cap_plan [0 ].candidate_clusters .zonal
534
+ assert len (lr_clusters ) >= 1
535
+ print (lr_clusters [0 ].instance .name )
536
+ assert lr_clusters [0 ].count == cluster_capacity .cluster_instance_count .high
537
+
538
+ families = set (
539
+ map (
540
+ lambda curr_plan : curr_plan .candidate_clusters .zonal [0 ].instance .family ,
541
+ cap_plan ,
542
+ )
543
+ )
544
+ # check that we restricted the instance family to only r7a
545
+ assert families == {"r7a" }
546
+
547
+ for lr in cap_plan :
548
+ print (lr .candidate_clusters .zonal [0 ])
0 commit comments