Skip to content

Commit 780daec

Browse files
committed
test: check consumed watts of replay usage profile
1 parent 71f0850 commit 780daec

File tree

6 files changed

+262
-14
lines changed

6 files changed

+262
-14
lines changed

flake.nix

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
cppMesonDevBase = nur-kapack.lib.${system}.cppMesonDevBase;
4949
pytest = pkgs.python3Packages.pytest;
5050
pytest-html = pkgs.python3Packages.pytest-html;
51+
pandas = pkgs.python3Packages.pandas;
5152
};
5253
callPackage = mergedPkgs: deriv-func: attrset: options: pkgs.lib.callPackageWith(mergedPkgs // options) deriv-func attrset;
5354
in rec {

nix/edc-test.nix

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{ stdenv, lib
2-
, batsim, batsim-edc-libs, pytest, pytest-html, batsim-internal-test
2+
, batsim, batsim-edc-libs, batsim-internal-test
3+
, pytest, pytest-html, pandas
34
, doCoverage ? false
45
, failOnTestFailed ? true
56
, startFromInternalCoverage ? false
@@ -16,6 +17,7 @@ stdenv.mkDerivation rec {
1617
batsim-edc-libs
1718
pytest
1819
pytest-html
20+
pandas
1921
] ++ lib.optional startFromInternalCoverage [ batsim-internal-test ];
2022

2123
src = lib.sourceByRegex ../. [

test/helper.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
WORKLOAD_DIR = os.environ['WORKLOAD_DIR']
88
EDC_DIR = os.environ['EDC_LD_LIBRARY_PATH']
99

10-
def prepare_instance(name: str, test_root_dir: str, platform: str, edc: str, workload: str=None, edc_init_content: str='', use_json: bool=False):
10+
def prepare_instance(name: str, test_root_dir: str, platform: str, edc: str, workload: str=None, edc_init_content: str='', use_json: bool=False, batsim_extra_args: list[str]=None):
1111
output_dir = f'{test_root_dir}/{name}'
1212
os.makedirs(output_dir, exist_ok=True)
1313

@@ -27,6 +27,9 @@ def prepare_instance(name: str, test_root_dir: str, platform: str, edc: str, wor
2727
'--workload', f'{WORKLOAD_DIR}/{workload}.json'
2828
])
2929

30+
if batsim_extra_args is not None:
31+
batsim_cmd += batsim_extra_args
32+
3033
batsim_cmd_filename = f'{output_dir}/batsim.sh'
3134
descriptor = os.open(path=batsim_cmd_filename, flags=os.O_WRONLY|os.O_CREAT|os.O_TRUNC, mode=0o700)
3235
with open(descriptor, 'w') as f:

test/test_profile.py

-12
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,3 @@ def test_replay_smpi(test_root_dir, smpi_workload_timeoutscale):
4242
batcmd, outdir = prepare_instance(instance_name, test_root_dir, platform, 'exec1by1', smpi_workload)
4343
p = run_batsim(batcmd, outdir, timeout=timeout)
4444
assert p.returncode == 0
45-
46-
def test_replay_usage(test_root_dir):
47-
platform = 'small_platform'
48-
workload = 'test_usage_trace'
49-
func_name = inspect.currentframe().f_code.co_name.replace('test_', '', 1)
50-
wload_name = workload.replace('test_', '', 1)
51-
instance_name = f'{MOD_NAME}-{func_name}-{wload_name}'
52-
53-
timeout = int(os.getenv('TEST_INSTANCE_TIMEOUT', '5')) * 3
54-
batcmd, outdir = prepare_instance(instance_name, test_root_dir, platform, 'exec1by1', workload)
55-
p = run_batsim(batcmd, outdir, timeout=timeout)
56-
assert p.returncode == 0

test/test_profile_replay_usage.py

+254
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
#!/usr/bin/env python3
2+
'''Usage trace tests.
3+
4+
These tests check that the energy consumption of usage trace profiles are the expected ones.
5+
'''
6+
import inspect
7+
import pandas as pd
8+
import pytest
9+
from helper import *
10+
11+
MOD_NAME = __name__.replace('test_', '', 1)
12+
13+
fast_speed = 100
14+
fast_widle = 50
15+
fast_wmin = 100
16+
fast_wmax = 200
17+
18+
slow_speed = 50
19+
slow_widle = 50
20+
slow_wmin = 90
21+
slow_wmax = 150
22+
23+
def joule_prediction(time, wattmin, wattmax, usage):
24+
return time*(wattmin + (wattmax-wattmin)*usage)
25+
26+
def check_ok_bool(row):
27+
if int(row['execution_time']) != int(row['expected_execution_time']):
28+
return False
29+
30+
if int(row['consumed_energy']) != int(row['expected_consumed_energy']):
31+
return False
32+
33+
return True
34+
35+
def check_ok(row):
36+
return int(check_ok_bool(row))
37+
38+
def estimate_job_from_real_trace():
39+
traces = [
40+
(0, 0.86, 1186),
41+
(0, 0.64, 469),
42+
(0, 0.79, 456),
43+
(0, 0.84, 4643),
44+
(0, 0.85, 4659),
45+
(0, 0.9, 1000),
46+
(0, 0.83, 3614),
47+
(0, 0.84, 4643),
48+
(0, 0.9, 933),
49+
(0, 0.83, 3759),
50+
(0, 0.89, 1011),
51+
(0, 0.83, 3614),
52+
(0, 0.84, 4571),
53+
(0, 0.91, 923),
54+
(0, 0.84, 3643),
55+
(0, 0.89, 1079),
56+
(0, 0.83, 3614),
57+
(0, 0.85, 4588),
58+
(0, 0.85, 4588),
59+
(0, 0.91, 989),
60+
(0, 0.84, 3643),
61+
(0, 0.85, 4659),
62+
(0, 0.84, 4643),
63+
(0, 0.88, 3068),
64+
(0, 0.79, 1519),
65+
(0, 0.89, 674),
66+
67+
(1, 0.98, 1041),
68+
(1, 0.73, 411),
69+
(1, 0.88, 409),
70+
(1, 0.98, 2755),
71+
(1, 0.88, 1364),
72+
(1, 1.0, 1020),
73+
(1, 0.94, 3383),
74+
(1, 0.99, 2424),
75+
(1, 0.88, 1432),
76+
(1, 0.98, 2694),
77+
(1, 0.89, 1416),
78+
(1, 1, 840),
79+
(1, 0.97, 2041),
80+
(1, 0.89, 1281),
81+
(1, 0.99, 2667),
82+
(1, 0.89, 1416),
83+
(1, 0.99, 2667),
84+
(1, 0.89, 1348),
85+
(1, 1, 840),
86+
(1, 0.98, 1898),
87+
(1, 0.89, 1348),
88+
(1, 0.98, 2816),
89+
(1, 0.88, 1364),
90+
(1, 0.97, 18990),
91+
(1, 0.89, 1348),
92+
(1, 0.99, 2727),
93+
(1, 0.89, 1348),
94+
(1, 1.0, 600),
95+
96+
(2, 0.84, 1429),
97+
(2, 0.64, 469),
98+
(2, 0.86, 3837),
99+
(2, 0.75, 960),
100+
(2, 0.9, 667),
101+
(2, 0.86, 3070),
102+
(2, 0.76, 947),
103+
(2, 0.86, 3698),
104+
(2, 0.76, 947),
105+
(2, 0.86, 3767),
106+
(2, 0.76, 868),
107+
(2, 0.86, 3837),
108+
(2, 0.75, 880),
109+
(2, 0.86, 3698),
110+
(2, 0.75, 960),
111+
(2, 0.86, 3628),
112+
(2, 0.75, 960),
113+
(2, 0.86, 3698),
114+
(2, 0.75, 960),
115+
(2, 0.86, 3767),
116+
(2, 0.75, 960),
117+
(2, 0.85, 3741),
118+
(2, 0.75, 960),
119+
(2, 0.86, 3698),
120+
(2, 0.76, 947),
121+
(2, 0.87, 3724),
122+
(2, 0.75, 960),
123+
(2, 0.87, 3655),
124+
(2, 0.75, 960),
125+
(2, 0.91, 593),
126+
(2, 0.85, 3176),
127+
(2, 0.75, 960),
128+
(2, 0.85, 3741),
129+
(2, 0.76, 947),
130+
(2, 0.88, 682),
131+
132+
(3, 0.87, 1379),
133+
(3, 0.66, 455),
134+
(3, 0.89, 3708),
135+
(3, 0.77, 935),
136+
(3, 0.9, 3600),
137+
(3, 0.78, 923),
138+
(3, 0.89, 3573),
139+
(3, 0.78, 923),
140+
(3, 0.88, 3750),
141+
(3, 0.77, 779),
142+
(3, 0.89, 3708),
143+
(3, 0.78, 846),
144+
(3, 0.89, 3573),
145+
(3, 0.77, 935),
146+
(3, 0.89, 3506),
147+
(3, 0.77, 935),
148+
(3, 0.9, 3533),
149+
(3, 0.77, 935),
150+
(3, 0.89, 3640),
151+
(3, 0.77, 935),
152+
(3, 0.89, 3573),
153+
(3, 0.77, 935),
154+
(3, 0.91, 3495),
155+
(3, 0.78, 923),
156+
(3, 0.9, 3600),
157+
(3, 0.78, 923),
158+
(3, 0.9, 3533),
159+
(3, 0.78, 923),
160+
(3, 0.9, 3600),
161+
(3, 0.78, 923),
162+
(3, 0.89, 3573),
163+
(3, 0.77, 935),
164+
(3, 0.91, 659),
165+
]
166+
traces_df = pd.DataFrame(traces, columns = ['machine_id', 'usage', 'flops'])
167+
168+
# job allocation (rank->machine_type)
169+
machines = [
170+
(0, 'fast'),
171+
(1, 'fast'),
172+
(2, 'slow'),
173+
(3, 'slow'),
174+
]
175+
machines_df = pd.DataFrame(machines, columns = ['machine_id', 'machine_type'])
176+
177+
# parameters of each type of machine
178+
machine_types = [
179+
('fast', fast_speed, fast_widle, fast_wmin, fast_wmax),
180+
('slow', slow_speed, slow_widle, slow_wmin, slow_wmax),
181+
]
182+
machine_types_df = pd.DataFrame(machine_types, columns = ['machine_type', 'speed', 'widle', 'wmin', 'wmax'])
183+
184+
df = pd.merge(traces_df, pd.merge(machines_df, machine_types_df))
185+
df['duration'] = df['flops'] / df['speed']
186+
df['w'] = df['wmin'] + (df['wmax'] - df['wmin']) * df['usage']
187+
df['joules'] = df['w'] * df['duration']
188+
189+
idles = df.groupby(['machine_id'])['duration'].agg('sum').to_frame()
190+
idles.reset_index(level=0, inplace=True)
191+
idles['job_duration'] = idles['duration'].max()
192+
idles['idle_time'] = idles['job_duration'] - idles['duration']
193+
idles = pd.merge(idles, pd.merge(machines_df, machine_types_df))
194+
idles['joules'] = idles['widle'] * idles['idle_time']
195+
196+
job_execution_time = idles['duration'].max()
197+
job_joules = idles['joules'].sum() + df['joules'].sum()
198+
return ['60', job_execution_time, job_joules]
199+
200+
def test_check_energy_consumed(test_root_dir):
201+
platform = 'small_platform_replay_usage'
202+
workload = 'test_usage_trace'
203+
func_name = inspect.currentframe().f_code.co_name.replace('test_', '', 1)
204+
wload_name = workload.replace('test_', '', 1)
205+
instance_name = f'{MOD_NAME}-{func_name}-{wload_name}'
206+
207+
timeout = int(os.getenv('TEST_INSTANCE_TIMEOUT', '5')) * 3
208+
batcmd, outdir = prepare_instance(instance_name, test_root_dir, platform, 'fcfs', workload, batsim_extra_args=['--energy-host'])
209+
p = run_batsim(batcmd, outdir, timeout=timeout)
210+
assert p.returncode == 0
211+
212+
# analyze Batsim results to check their energy consumption is the expected one.
213+
batjobs_filename = f'{outdir}/batout/jobs.csv'
214+
jobs = pd.read_csv(batjobs_filename)
215+
jobs['job_id'] = jobs['job_id'].astype('string')
216+
jobs.sort_values(by=['job_id'], inplace=True)
217+
218+
expected = [
219+
['0', 10, 2*joule_prediction(10, fast_widle, fast_widle, 0.0)],
220+
221+
['10', 10, 2*joule_prediction(10, fast_wmin, fast_wmax, 1.0)],
222+
['11', 20, 2*joule_prediction(20, slow_wmin, slow_wmax, 1.0)],
223+
224+
['20', 10, 2*joule_prediction(10, fast_wmin, fast_wmax, 0.5)],
225+
['21', 20, 2*joule_prediction(20, slow_wmin, slow_wmax, 0.5)],
226+
227+
['30', 20, 2*(joule_prediction(10, fast_wmin, fast_wmax, 1.0)+joule_prediction(10, fast_wmin, fast_wmax, 0.1))],
228+
['31', 40, 2*(joule_prediction(20, slow_wmin, slow_wmax, 1.0)+joule_prediction(20, slow_wmin, slow_wmax, 0.1))],
229+
230+
['40', 10, joule_prediction(10, fast_wmin, fast_wmax, 0.2)+joule_prediction(10, fast_wmin, fast_wmax, 0.6)],
231+
['41', 20, joule_prediction(20, slow_wmin, slow_wmax, 0.2)+joule_prediction(20, slow_wmin, slow_wmax, 0.6)],
232+
233+
['50', 20, joule_prediction(10, fast_wmin, fast_wmax, 0.1)+joule_prediction(10, fast_widle, fast_widle, 0.0) +
234+
joule_prediction(10, fast_wmin, fast_wmax, 0.01)+joule_prediction(10, fast_wmin, fast_wmax, 0.97)],
235+
['51', 40, joule_prediction(20, slow_wmin, slow_wmax, 0.1)+joule_prediction(20, slow_widle, slow_widle, 0.0) +
236+
joule_prediction(20, slow_wmin, slow_wmax, 0.01)+joule_prediction(20, slow_wmin, slow_wmax, 0.97)],
237+
238+
estimate_job_from_real_trace()
239+
]
240+
expected_df = pd.DataFrame(expected, columns = ['job_id', 'expected_execution_time', 'expected_consumed_energy'])
241+
242+
merged = pd.merge(jobs, expected_df)
243+
if len(merged) != len(jobs):
244+
raise Exception('There are {} jobs in the workload but only {} jobs are known by the test'.format(len(jobs), len(merged)))
245+
246+
merged['valid'] = merged.apply(check_ok, axis=1)
247+
if merged['valid'].sum() != len(merged):
248+
print('Some jobs are invalid!')
249+
print(merged[['job_id', 'valid', 'execution_time', 'expected_execution_time', 'consumed_energy', 'expected_consumed_energy']])
250+
raise Exception('The execution of some jobs did not match this test expectations.')
251+
else:
252+
print('All jobs are valid!')
253+
print(merged[['job_id', 'valid', 'execution_time', 'expected_execution_time', 'consumed_energy', 'expected_consumed_energy']])
254+

0 commit comments

Comments
 (0)