Skip to content

Commit d1f228f

Browse files
ayushisingh29ayushis
and
ayushis
authored
common zonal requirement for a cluster using current capacity and buffers (#127)
* common zonal requirement for a cluster using current capacity and buffers * Only take action if new utilization is greater than current allocation for scale intent * tests for cpu, disk scale and preserve added * Cassandra integration with common buffers * review comments addressed --------- Co-authored-by: ayushis <ayushis@netflix.com>
1 parent 7f2ba95 commit d1f228f

File tree

4 files changed

+405
-62
lines changed

4 files changed

+405
-62
lines changed

service_capacity_modeling/models/common.py

+198
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
from service_capacity_modeling.interface import AVG_ITEM_SIZE_BYTES
1313
from service_capacity_modeling.interface import Buffer
1414
from service_capacity_modeling.interface import BufferComponent
15+
from service_capacity_modeling.interface import BufferIntent
1516
from service_capacity_modeling.interface import Buffers
1617
from service_capacity_modeling.interface import CapacityDesires
1718
from service_capacity_modeling.interface import CapacityPlan
19+
from service_capacity_modeling.interface import CapacityRequirement
1820
from service_capacity_modeling.interface import certain_float
1921
from service_capacity_modeling.interface import certain_int
2022
from service_capacity_modeling.interface import Clusters
2123
from service_capacity_modeling.interface import CurrentClusterCapacity
24+
from service_capacity_modeling.interface import CurrentClusters
2225
from service_capacity_modeling.interface import default_reference_shape
2326
from service_capacity_modeling.interface import Drive
2427
from service_capacity_modeling.interface import Instance
@@ -689,3 +692,198 @@ def merge_plan(
689692
return CapacityPlan(
690693
requirements=merged_requirements, candidate_clusters=merged_clusters
691694
)
695+
696+
697+
def derived_buffer_for_component(buffer: Dict[str, Buffer], components: List[str]):
698+
scale = 0.0
699+
preserve = False
700+
701+
if not buffer:
702+
return scale, preserve
703+
704+
for bfr in buffer.values():
705+
if any(component in components for component in bfr.components):
706+
if bfr.intent == BufferIntent.scale:
707+
scale = max(scale, bfr.ratio)
708+
if bfr.intent == BufferIntent.preserve:
709+
preserve = True
710+
711+
return scale, preserve
712+
713+
714+
def get_cores_from_current_capacity(
715+
current_capacity: CurrentClusterCapacity, buffers: Buffers, instance: Instance
716+
):
717+
# compute cores required per zone
718+
cpu_success_buffer = (1 - cpu_headroom_target(instance, buffers)) * 100
719+
current_cpu_utilization = current_capacity.cpu_utilization.mid
720+
721+
if current_capacity.cluster_instance is None:
722+
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
723+
else:
724+
cluster_instance = current_capacity.cluster_instance
725+
726+
current_cores = cluster_instance.cpu * current_capacity.cluster_instance_count.mid
727+
728+
scale, preserve = derived_buffer_for_component(buffers.derived, ["compute", "cpu"])
729+
# Scale and preserve for the same component should not be passed together.
730+
# If user passes it, then scale will be preferred over preserve.
731+
if scale > 0:
732+
# if the new cpu core is less than the current,
733+
# then take no action and return the current cpu cores
734+
new_cpu_utilization = current_cpu_utilization * scale
735+
core_scale_up_factor = max(1.0, new_cpu_utilization / cpu_success_buffer)
736+
return math.ceil(current_cores * core_scale_up_factor)
737+
738+
if preserve:
739+
return current_cores
740+
741+
return int(current_cores * (current_cpu_utilization / cpu_success_buffer))
742+
743+
744+
def get_memory_from_current_capacity(
745+
current_capacity: CurrentClusterCapacity, buffers: Buffers
746+
):
747+
# compute memory required per zone
748+
current_memory_utilization = current_capacity.memory_utilization_gib.mid
749+
750+
if current_capacity.cluster_instance is None:
751+
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
752+
else:
753+
cluster_instance = current_capacity.cluster_instance
754+
755+
zonal_ram_allocated = (
756+
cluster_instance.ram_gib * current_capacity.cluster_instance_count.mid
757+
)
758+
759+
# These are the desired buffers
760+
memory_buffer = buffer_for_components(
761+
buffers=buffers, components=[BufferComponent.memory]
762+
)
763+
764+
scale, preserve = derived_buffer_for_component(
765+
buffers.derived, ["memory", "storage"]
766+
)
767+
# Scale and preserve for the same component should not be passed together.
768+
# If user passes it, then scale will be preferred over preserve.
769+
if scale > 0:
770+
# if the new required memory is less than the current,
771+
# then take no action and return the current ram
772+
return max(
773+
current_memory_utilization * scale * memory_buffer.ratio,
774+
zonal_ram_allocated,
775+
)
776+
777+
if preserve:
778+
return zonal_ram_allocated
779+
780+
return current_memory_utilization * memory_buffer.ratio
781+
782+
783+
def get_network_from_current_capacity(
784+
current_capacity: CurrentClusterCapacity, buffers: Buffers
785+
):
786+
# compute network required per zone
787+
current_network_utilization = current_capacity.network_utilization_mbps.mid
788+
789+
if current_capacity.cluster_instance is None:
790+
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
791+
else:
792+
cluster_instance = current_capacity.cluster_instance
793+
794+
zonal_network_allocated = (
795+
cluster_instance.net_mbps * current_capacity.cluster_instance_count.mid
796+
)
797+
798+
# These are the desired buffers
799+
network_buffer = buffer_for_components(
800+
buffers=buffers, components=[BufferComponent.network]
801+
)
802+
803+
scale, preserve = derived_buffer_for_component(
804+
buffers.derived, ["compute", "network"]
805+
)
806+
# Scale and preserve for the same component should not be passed together.
807+
# If user passes it, then scale will be preferred over preserve.
808+
if scale > 0:
809+
# if the new required network is less than the current,
810+
# then take no action and return the current bandwidth
811+
return max(
812+
current_network_utilization * scale * network_buffer.ratio,
813+
zonal_network_allocated,
814+
)
815+
816+
if preserve:
817+
return zonal_network_allocated
818+
819+
return current_network_utilization * network_buffer.ratio
820+
821+
822+
def get_disk_from_current_capacity(
823+
current_capacity: CurrentClusterCapacity, buffers: Buffers
824+
):
825+
# compute disk required per zone
826+
current_disk_utilization = current_capacity.disk_utilization_gib.mid
827+
828+
if current_capacity.cluster_instance is None:
829+
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
830+
else:
831+
cluster_instance = current_capacity.cluster_instance
832+
833+
assert cluster_instance.drive is not None, "Drive should not be None"
834+
835+
zonal_disk_allocated = (
836+
cluster_instance.drive.max_size_gib
837+
* current_capacity.cluster_instance_count.mid
838+
)
839+
840+
# These are the desired buffers
841+
disk_buffer = buffer_for_components(
842+
buffers=buffers, components=[BufferComponent.disk]
843+
)
844+
845+
scale, preserve = derived_buffer_for_component(buffers.derived, ["storage", "disk"])
846+
# Scale and preserve for the same component should not be passed together.
847+
# If user passes it, then scale will be preferred over preserve.
848+
if scale > 0:
849+
# if the new required disk is less than the current,
850+
# then take no action and return the current disk
851+
return max(
852+
current_disk_utilization * scale * disk_buffer.ratio, zonal_disk_allocated
853+
)
854+
if preserve:
855+
# preserve the current disk size for the zone
856+
return zonal_disk_allocated
857+
858+
return current_disk_utilization * disk_buffer.ratio
859+
860+
861+
def zonal_requirements_from_current(
862+
current_cluster: CurrentClusters,
863+
buffers: Buffers,
864+
instance: Instance,
865+
reference_shape: Instance,
866+
) -> CapacityRequirement:
867+
if current_cluster is not None and current_cluster.zonal[0] is not None:
868+
current_capacity: CurrentClusterCapacity = current_cluster.zonal[0]
869+
needed_cores = normalize_cores(
870+
get_cores_from_current_capacity(current_capacity, buffers, instance),
871+
instance,
872+
reference_shape,
873+
)
874+
needed_network_mbps = get_network_from_current_capacity(
875+
current_capacity, buffers
876+
)
877+
needed_memory_gib = get_memory_from_current_capacity(current_capacity, buffers)
878+
needed_disk_gib = get_disk_from_current_capacity(current_capacity, buffers)
879+
880+
return CapacityRequirement(
881+
requirement_type="zonal-capacity",
882+
cpu_cores=certain_int(needed_cores),
883+
mem_gib=certain_float(needed_memory_gib),
884+
disk_gib=certain_float(needed_disk_gib),
885+
network_mbps=certain_float(needed_network_mbps),
886+
reference_shape=current_capacity.cluster_instance,
887+
)
888+
else:
889+
raise ValueError("Please check if current_cluster is populated correctly.")

0 commit comments

Comments
 (0)