Skip to content

Commit fe56a4f

Browse files
[COST-5990] Create new cost model for vm count metric (#5542)
* [COST-5990] add monthly cost model metric rates * Add per hour metrics to the api * updates for vm count metrics * remove vm core count metrics * clean up * add vm per hour cost * Restructure monthly to have mapping * get vm hourly usage from pod usage via trino * linting upate * clean up unit test * Stub out unittests * Remove unrelated usage information for monthly --------- Co-authored-by: myersCody <cmyers@redhat.com>
1 parent 5fcd508 commit fe56a4f

File tree

8 files changed

+378
-5
lines changed

8 files changed

+378
-5
lines changed

dev/scripts/cost_models/openshift_on_prem_cost_model.json

+28
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,34 @@
213213
]
214214
},
215215
"cost_type": "Infrastructure"
216+
},
217+
{
218+
"metric": {
219+
"name": "vm_cost_per_month"
220+
},
221+
"tiered_rates": [
222+
{
223+
"unit": "USD",
224+
"value": 0.5,
225+
"usage_start": null,
226+
"usage_end": null
227+
}
228+
],
229+
"cost_type": "Infrastructure"
230+
},
231+
{
232+
"metric": {
233+
"name": "vm_cost_per_hour"
234+
},
235+
"tiered_rates": [
236+
{
237+
"unit": "USD",
238+
"value": 0.007,
239+
"usage_start": null,
240+
"usage_end": null
241+
}
242+
],
243+
"cost_type": "Supplementary"
216244
}
217245
]
218246
}

koku/api/metrics/constants.py

+22
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
OCP_CLUSTER_MONTH = "cluster_cost_per_month"
2121
OCP_CLUSTER_CORE_HOUR = "cluster_core_cost_per_hour"
2222
OCP_PVC_MONTH = "pvc_cost_per_month"
23+
OCP_VM_MONTH = "vm_cost_per_month"
24+
OCP_VM_HOUR = "vm_cost_per_hour"
2325

2426
# defines the usage type for each metric
2527
CPU = "cpu"
@@ -58,6 +60,8 @@
5860
(OCP_CLUSTER_MONTH, OCP_CLUSTER_MONTH),
5961
(OCP_CLUSTER_CORE_HOUR, OCP_CLUSTER_CORE_HOUR),
6062
(OCP_PVC_MONTH, OCP_PVC_MONTH),
63+
(OCP_VM_MONTH, OCP_VM_MONTH),
64+
(OCP_VM_HOUR, OCP_VM_HOUR),
6165
)
6266

6367
COST_TYPE_CHOICES = (
@@ -75,6 +79,7 @@
7579
OCP_METRIC_STORAGE_GB_USAGE_MONTH,
7680
OCP_METRIC_STORAGE_GB_REQUEST_MONTH,
7781
OCP_NODE_CORE_HOUR,
82+
OCP_VM_HOUR,
7883
OCP_CLUSTER_CORE_HOUR,
7984
)
8085

@@ -83,6 +88,7 @@
8388
OCP_NODE_CORE_MONTH,
8489
OCP_CLUSTER_MONTH,
8590
OCP_PVC_MONTH,
91+
OCP_VM_MONTH,
8692
)
8793

8894
DISTRIBUTION_CHOICES = ((MEM, MEM), (CPU, CPU))
@@ -200,6 +206,22 @@
200206
"label_measurement_unit": "pvc-month",
201207
"default_cost_type": "Infrastructure",
202208
},
209+
{
210+
"source_type": "OCP",
211+
"metric": "vm_cost_per_month",
212+
"label_metric": "Virtual Machine",
213+
"label_measurement": "Count",
214+
"label_measurement_unit": "vm-month",
215+
"default_cost_type": "Infrastructure",
216+
},
217+
{
218+
"source_type": "OCP",
219+
"metric": "vm_cost_per_hour",
220+
"label_metric": "Virtual Machine",
221+
"label_measurement": "Count",
222+
"label_measurement_unit": "vm-hour",
223+
"default_cost_type": "Infrastructure",
224+
},
203225
{
204226
"source_type": "OCP",
205227
"metric": "cluster_core_cost_per_hour",

koku/masu/database/ocp_report_db_accessor.py

+79-4
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,18 @@ def populate_monthly_cost_sql(self, cost_type, rate_type, rate, start_date, end_
473473
distribution: Choice of monthly distribution ex. memory
474474
provider_uuid (str): The str of the provider UUID
475475
"""
476+
cost_type_file_mapping = {
477+
"Node": "monthly_cost_cluster_and_node.sql",
478+
"Node_Core_Month": "monthly_cost_cluster_and_node.sql",
479+
"Cluster": "monthly_cost_cluster_and_node.sql",
480+
"PVC": "monthly_cost_persistentvolumeclaim.sql",
481+
"OCP_VM": "monthly_cost_virtual_machine.sql",
482+
}
483+
cost_type_file = cost_type_file_mapping.get(cost_type)
484+
if not cost_type_file:
485+
LOG.error(f"Invalid cost_type: {cost_type} for OCP provider. Skipping populate_monthly_cost_sql update")
486+
return
487+
476488
table_name = self._table_map["line_item_daily_summary"]
477489
report_period = self.report_periods_for_provider_uuid(provider_uuid, start_date)
478490
ctx = {
@@ -503,11 +515,8 @@ def populate_monthly_cost_sql(self, cost_type, rate_type, rate, start_date, end_
503515
)
504516
# We cleared out existing data, but there is no new to calculate.
505517
return
506-
if cost_type in ("Node", "Node_Core_Month", "Cluster"):
507-
sql = pkgutil.get_data("masu.database", "sql/openshift/cost_model/monthly_cost_cluster_and_node.sql")
508-
elif cost_type == "PVC":
509-
sql = pkgutil.get_data("masu.database", "sql/openshift/cost_model/monthly_cost_persistentvolumeclaim.sql")
510518

519+
sql = pkgutil.get_data("masu.database", f"sql/openshift/cost_model/{cost_type_file}")
511520
sql = sql.decode("utf-8")
512521
sql_params = {
513522
"start_date": start_date,
@@ -642,9 +651,75 @@ def populate_usage_costs(self, rate_type, rates, start_date, end_date, provider_
642651
"volume_request_rate": rates.get(metric_constants.OCP_METRIC_STORAGE_GB_REQUEST_MONTH, 0),
643652
"rate_type": rate_type,
644653
}
654+
645655
LOG.info(log_json(msg=f"populating {rate_type} usage costs", context=ctx))
646656
self._prepare_and_execute_raw_sql_query(table_name, sql, sql_params, operation="INSERT")
647657

658+
if ocp_vm_hour_rate := rates.get(metric_constants.OCP_VM_HOUR):
659+
self.populate_vm_hourly_usage_costs(
660+
rate_type, ocp_vm_hour_rate, start_date, end_date, provider_uuid, report_period_id
661+
)
662+
663+
def populate_vm_hourly_usage_costs(
664+
self, rate_type, ocp_vm_hour_rate, start_date, end_date, provider_uuid, report_period_id
665+
):
666+
"""Populate virtual machine hourly usage costs"""
667+
668+
ctx = {
669+
"schema": self.schema,
670+
"provider_uuid": str(provider_uuid),
671+
"start_date": start_date,
672+
"end_date": end_date,
673+
"report_period": report_period_id,
674+
}
675+
676+
table_name = OCP_REPORT_TABLE_MAP["line_item_daily_summary"]
677+
LOG.info(
678+
log_json(
679+
msg=f"removing virtual machine cost model {rate_type} hourly costs from daily summary", context=ctx
680+
)
681+
)
682+
683+
tmp_sql = """
684+
DELETE FROM {{schema | sqlsafe}}.{{table | sqlsafe}}
685+
WHERE usage_start >= {{start_date}}
686+
AND usage_start <= {{end_date}}
687+
AND report_period_id = {{report_period_id}}
688+
AND cost_model_rate_type = {{rate_type}}
689+
AND source_uuid = {{source_uuid}}::uuid
690+
AND monthly_cost_type IS NULL
691+
AND all_labels ? 'vm_kubevirt_io_name'
692+
"""
693+
tmp_sql_params = {
694+
"schema": self.schema,
695+
"table": table_name,
696+
"start_date": start_date,
697+
"end_date": end_date,
698+
"report_period_id": report_period_id,
699+
"rate_type": rate_type,
700+
"source_uuid": str(provider_uuid),
701+
}
702+
703+
self._prepare_and_execute_raw_sql_query(table_name, tmp_sql, tmp_sql_params, operation="DELETE")
704+
705+
sql = pkgutil.get_data("masu.database", "trino_sql/openshift/cost_model/hourly_cost_virtual_machine.sql")
706+
sql = sql.decode("utf-8")
707+
sql_params = {
708+
"start_date": str(start_date),
709+
"end_date": str(end_date),
710+
"schema": self.schema,
711+
"source_uuid": str(provider_uuid),
712+
"report_period_id": report_period_id,
713+
"vm_cost_per_hour": ocp_vm_hour_rate,
714+
"rate_type": rate_type,
715+
}
716+
start_date = DateHelper().parse_to_date(start_date)
717+
sql_params["year"] = start_date.strftime("%Y")
718+
sql_params["month"] = start_date.strftime("%m")
719+
720+
LOG.info(log_json(msg=f"populating virtual machine {rate_type} hourly costs", context=ctx))
721+
self._execute_trino_multipart_sql_query(sql, bind_params=sql_params)
722+
648723
def populate_tag_usage_costs( # noqa: C901
649724
self, infrastructure_rates, supplementary_rates, start_date, end_date, cluster_id
650725
):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
DELETE FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids
2+
WHERE lids.usage_start >= {{start_date}}::date
3+
AND lids.usage_start <= {{end_date}}::date
4+
AND lids.report_period_id = {{report_period_id}}
5+
AND lids.cost_model_rate_type = {{rate_type}}
6+
AND lids.monthly_cost_type = {{cost_type}}
7+
;
8+
9+
INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
10+
uuid,
11+
report_period_id,
12+
cluster_id,
13+
cluster_alias,
14+
data_source,
15+
usage_start,
16+
usage_end,
17+
namespace,
18+
node,
19+
resource_id,
20+
pod_labels,
21+
all_labels,
22+
source_uuid,
23+
cost_model_rate_type,
24+
cost_model_cpu_cost,
25+
cost_model_memory_cost,
26+
cost_model_volume_cost,
27+
monthly_cost_type,
28+
cost_category_id
29+
)
30+
31+
SELECT uuid_generate_v4(),
32+
max(report_period_id) AS report_period_id,
33+
cluster_id,
34+
cluster_alias,
35+
data_source,
36+
usage_start,
37+
usage_end,
38+
namespace,
39+
node,
40+
max(resource_id) AS resource_id,
41+
pod_labels,
42+
all_labels,
43+
source_uuid,
44+
{{rate_type}} AS cost_model_rate_type,
45+
{{rate}}::decimal AS cost_model_cpu_cost,
46+
0 AS cost_model_memory_cost,
47+
0 AS cost_model_volume_cost,
48+
{{cost_type}} AS monthly_cost_type,
49+
cost_category_id
50+
FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids
51+
WHERE usage_start >= {{start_date}}::date
52+
AND usage_start <= {{end_date}}::date
53+
AND report_period_id = {{report_period_id}}
54+
AND data_source = 'Pod'
55+
AND all_labels ? 'vm_kubevirt_io_name'
56+
AND pod_request_cpu_core_hours IS NOT NULL
57+
AND pod_request_cpu_core_hours != 0
58+
AND monthly_cost_type IS NULL
59+
GROUP BY usage_start,
60+
usage_end,
61+
source_uuid,
62+
cluster_id,
63+
cluster_alias,
64+
node,
65+
namespace,
66+
data_source,
67+
cost_category_id,
68+
pod_labels,
69+
all_labels
70+
;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
2+
uuid,
3+
report_period_id,
4+
cluster_id,
5+
cluster_alias,
6+
data_source,
7+
usage_start,
8+
usage_end,
9+
namespace,
10+
node,
11+
resource_id,
12+
pod_labels,
13+
all_labels,
14+
source_uuid,
15+
cost_model_rate_type,
16+
cost_model_cpu_cost,
17+
cost_model_memory_cost,
18+
cost_model_volume_cost,
19+
cost_category_id
20+
)
21+
22+
-- get vms from daily table
23+
WITH cte_vms AS (
24+
SELECT
25+
DISTINCT json_extract_scalar(pod_labels, '$.vm_kubevirt_io_name') AS vm_name
26+
FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
27+
WHERE json_extract_scalar(pod_labels, '$.vm_kubevirt_io_name') IS NOT NULL
28+
AND usage_start >= DATE({{start_date}})
29+
AND usage_start <= DATE({{end_date}})
30+
AND source_uuid = CAST({{source_uuid}} as uuid)
31+
AND pod_request_cpu_core_hours IS NOT NULL
32+
AND pod_request_cpu_core_hours != 0
33+
),
34+
35+
-- get number of hours for every time we see a vm label
36+
cte_vm_usage_hours AS (
37+
SELECT
38+
cte_vms.vm_name,
39+
DATE(interval_start) as interval_day,
40+
count(pod_usage.interval_start) AS vm_interval_hours
41+
FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items pod_usage
42+
INNER JOIN cte_vms
43+
ON json_extract_scalar(pod_usage.pod_labels, '$.vm_kubevirt_io_name') = cte_vms.vm_name
44+
WHERE strpos(lower(pod_labels), 'vm_kubevirt_io_name": "') != 0
45+
AND source = {{source_uuid}}
46+
AND year={{year}}
47+
AND month={{month}}
48+
GROUP BY vm_name, DATE(interval_start)
49+
)
50+
51+
SELECT uuid(),
52+
max(report_period_id) AS report_period_id,
53+
cluster_id,
54+
cluster_alias,
55+
data_source,
56+
usage_start,
57+
usage_end,
58+
namespace,
59+
node,
60+
max(resource_id) AS resource_id,
61+
pod_labels,
62+
all_labels,
63+
source_uuid,
64+
{{rate_type}} AS cost_model_rate_type,
65+
max(vmhrs.vm_interval_hours) * CAST({{vm_cost_per_hour}} as DECIMAL(33, 15)) AS cost_model_cpu_cost,
66+
0 AS cost_model_memory_cost,
67+
0 AS cost_model_volume_cost,
68+
cost_category_id
69+
FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids
70+
JOIN cte_vm_usage_hours AS vmhrs
71+
ON json_extract_scalar(pod_labels, '$.vm_kubevirt_io_name') = vmhrs.vm_name
72+
AND vmhrs.interval_day=lids.usage_start
73+
AND vmhrs.interval_day=lids.usage_end
74+
WHERE usage_start >= DATE({{start_date}})
75+
AND usage_start <= DATE({{end_date}})
76+
AND report_period_id = {{report_period_id}}
77+
AND data_source = 'Pod'
78+
AND json_extract_scalar(all_labels, '$.vm_kubevirt_io_name') IS NOT NULL
79+
AND pod_usage_cpu_core_hours IS NOT NULL
80+
AND pod_request_cpu_core_hours IS NOT NULL
81+
AND pod_request_cpu_core_hours != 0
82+
AND monthly_cost_type IS NULL
83+
GROUP BY usage_start,
84+
usage_end,
85+
source_uuid,
86+
cluster_id,
87+
cluster_alias,
88+
node,
89+
namespace,
90+
data_source,
91+
cost_category_id,
92+
pod_labels,
93+
all_labels
94+
;

0 commit comments

Comments
 (0)