-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from radical-cybertools/seavea-hackathon-2025
SEAVEA Hackathon 2025
- Loading branch information
Showing
24 changed files
with
4,400 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Docker container with RADICAL-Cybertools (RCT) tutorial notebooks | ||
|
||
[](https://mybinder.org/v2/gh/radical-cybertools/tutorials/main) | ||
|
||
> [!IMPORTANT] | ||
> [Binder](https://mybinder.readthedocs.io) lets you run and test our RCT | ||
> notebooks. You should **not** expect Binder to match the performance | ||
> achievable by RCT on local or high performance computing (HPC) platforms. | ||
> If performance is a consideration, please consider executing our RCT Docker | ||
> container locally or in a suitable HPC environment. | ||
## How to run | ||
|
||
The RCT Tutorials container is based on | ||
[jupyter/minimal-notebook](https://github.com/jupyter/docker-stacks) image. | ||
We prepared a container image with all the latest tutorials in this | ||
repository, and uploaded this image to | ||
[DockerHub](https://hub.docker.com/u/radicalcybertools) | ||
(`radicalcybertools/tutorials:seavea-hackathon-2025`). The following command | ||
pulls and runs the RCT Tutorials container locally (**NOTE**: `docker` is | ||
required to be installed locally): | ||
|
||
```shell | ||
docker run --rm -it -p 8888:8888 radicalcybertools/tutorials:seavea-hackathon-2025 | ||
``` | ||
|
||
## Documentation | ||
|
||
* RADICAL-EnTK: https://radicalentk.readthedocs.io/ | ||
* RADICAL-Pilot: https://radicalpilot.readthedocs.io/ | ||
|
212 changes: 212 additions & 0 deletions
212
src/seavea-hackathon-2025/drafts/entk-darshan/darshan-enabled.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "d7060a86-eda9-46d7-a6a7-dc1dd0eeaa46", | ||
"metadata": {}, | ||
"source": [ | ||
"# RADICAL-EnTK with Darshan to generate workflow provenance graph\n", | ||
"\n", | ||
"## Darshan\n", | ||
"\n", | ||
"https://wordpress.cels.anl.gov/darshan/\n", | ||
"\n", | ||
"Darshan is an HPC I/O characterization tool. It is designed to capture an accurate picture of application I/O behavior, including properties such as patterns of access within files, with minimum overhead. Darshan can be used to investigate and tune the I/O behavior of complex HPC applications." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "b0f73a4a-2388-4203-bb5d-efd04aa5ce60", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%capture\n", | ||
"# install darshan or ensure that it is installed\n", | ||
"!./scripts/darshan-install.sh" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "5c6b8ba2-48f8-496e-8f32-bec3cb2c27dc", | ||
"metadata": {}, | ||
"source": [ | ||
"## Example of enabling Darshan\n", | ||
"\n", | ||
"In the example, Darshan is enabled for a specific `re.Task` and for all tasks within a specific `re.Stage`." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "db41fa7d-c485-4b10-bc2e-d90c252a61b3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"\n", | ||
"import radical.entk as re\n", | ||
"import radical.pilot as rp\n", | ||
"\n", | ||
"from radical.entk.tools import (cache_darshan_env,\n", | ||
" with_darshan,\n", | ||
" enable_darshan,\n", | ||
" get_provenance_graph)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "05556167-3618-4476-913a-195e52d2c27d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%env RADICAL_REPORT_ANIME=FALSE\n", | ||
"%env RADICAL_REPORT=TRUE\n", | ||
"%env RADICAL_LOG_LVL=DEBUG" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "bebd98ac-c4eb-42cd-be6c-781cc6d5af91", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%capture\n", | ||
"# prepare darshan environment\n", | ||
"# runtime root dir (if not provided then env variable $DARSHAN_RUNTIME_ROOT should be set)\n", | ||
"# modules (optional)\n", | ||
"# env variables (optional)\n", | ||
"cache_darshan_env(darshan_runtime_root='/usr/local',\n", | ||
" modules=[], env={})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "3def0316-c6b2-414e-8cc3-113e227c9e25", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"TASK_01_OUTPUT = 'output_01.dat'\n", | ||
"\n", | ||
"\n", | ||
"def get_stage_0():\n", | ||
"\n", | ||
" # hello-RP task\n", | ||
" task_00 = re.Task({\n", | ||
" 'executable': 'radical-pilot-hello.sh',\n", | ||
" 'arguments' : [10],\n", | ||
" 'cpu_reqs' : {'cpu_processes' : 1,\n", | ||
" 'cpu_threads' : 4,\n", | ||
" 'cpu_thread_type': rp.OpenMP}\n", | ||
" })\n", | ||
"\n", | ||
" # R/W data\n", | ||
" task_01 = re.Task({\n", | ||
" 'executable' : '/bin/sh',\n", | ||
" 'arguments' : ['-c', f'cat input.dat | wc > {TASK_01_OUTPUT}'],\n", | ||
" 'upload_input_data': ['/etc/passwd > input.dat'],\n", | ||
" 'copy_output_data' : [f'{TASK_01_OUTPUT} > $SHARED/{TASK_01_OUTPUT}']\n", | ||
" })\n", | ||
"\n", | ||
" stage_0 = re.Stage()\n", | ||
" # --- enable Darshan for task \"task_01\" only\n", | ||
" stage_0.add_tasks([task_00, enable_darshan(task_01)])\n", | ||
" return stage_0\n", | ||
"\n", | ||
"\n", | ||
"# --- enable Darshan for the whole \"stage_1\" using decorator\n", | ||
"@with_darshan\n", | ||
"def get_stage_1():\n", | ||
"\n", | ||
" # R/W data and task depends on the task from the previous stage\n", | ||
" task_10 = re.Task({\n", | ||
" 'executable' : '/bin/sh',\n", | ||
" 'arguments' : ['-c',\n", | ||
" f\"sed -r 's/\\s+//g' {TASK_01_OUTPUT} \" + # noqa: W605\n", | ||
" '| grep -o . | sort | uniq -c > output_10.dat'],\n", | ||
" 'copy_input_data': [f'$SHARED/{TASK_01_OUTPUT} > {TASK_01_OUTPUT}']\n", | ||
" })\n", | ||
"\n", | ||
" stage_1 = re.Stage()\n", | ||
" stage_1.add_tasks([task_10])\n", | ||
" return stage_1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e49bc965-f69c-4416-8426-5a1b2c1b96aa", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"amgr = re.AppManager()\n", | ||
"amgr.resource_desc = {'resource': 'local.localhost',\n", | ||
" 'cpus' : 8,\n", | ||
" 'walltime': 15}\n", | ||
"\n", | ||
"pipeline = re.Pipeline()\n", | ||
"pipeline.add_stages([get_stage_0(), get_stage_1()])\n", | ||
"\n", | ||
"amgr.workflow = [pipeline]\n", | ||
"amgr.run()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "abb5ec82-ba30-460b-963f-b5401394f58e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"sid = amgr.sid\n", | ||
"\n", | ||
"!ls -al ~/radical.pilot.sandbox/\"$sid\"/pilot.0000/task.000001/darshan_logs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8d41123e-23a6-4057-a737-192cc41fb5ab", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pprint import pformat\n", | ||
"\n", | ||
"print(pformat(get_provenance_graph(pipelines=[pipeline],\n", | ||
" output_file='entk_provenance.json')))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6396c7bf-731c-472d-9276-104d68c7e736", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
37 changes: 37 additions & 0 deletions
37
src/seavea-hackathon-2025/drafts/entk-darshan/scripts/darshan-install.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
|
||
if ! test -z "$(which darshan-config)"; then | ||
echo "Darshan installed" | ||
exit 0 | ||
fi | ||
|
||
SCRIPTS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )/" &> /dev/null \ | ||
&& pwd 2> /dev/null; )" | ||
|
||
# patch EnTK with Darshan related fixes | ||
"$SCRIPTS_DIR/entk-patched.sh" | ||
|
||
(sudo apt-get update -y && \ | ||
sudo apt-get install -y \ | ||
autoconf build-essential cmake libtool tar) \ | ||
&> /dev/null || true | ||
|
||
DARSHAN_VER=3.4.6 | ||
|
||
cd "$HOME" || true | ||
wget -q "https://web.cels.anl.gov/projects/darshan/releases/darshan-$DARSHAN_VER.tar.gz" | ||
tar -xvzf "darshan-$DARSHAN_VER.tar.gz" | ||
|
||
cd "$HOME/darshan-$DARSHAN_VER/darshan-runtime/" || true | ||
../prepare.sh | ||
MAKE=gmake ./configure \ | ||
--prefix="/usr/local/" \ | ||
--with-log-path-by-env=DARSHAN_LOG_DIR_PATH \ | ||
--with-jobid-env=NONE --without-mpi CC=gcc | ||
sudo make && sudo make install | ||
|
||
cd "$HOME/darshan-$DARSHAN_VER/darshan-util" || true | ||
../prepare.sh | ||
./configure --prefix="/usr/local/" | ||
sudo make && sudo make install | ||
|
13 changes: 13 additions & 0 deletions
13
src/seavea-hackathon-2025/drafts/entk-darshan/scripts/entk-patched.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
SCRIPTS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )/" &> /dev/null \ | ||
&& pwd 2> /dev/null; )" | ||
|
||
REPO_DIR="$HOME/entk" | ||
git clone --single-branch --branch master \ | ||
https://github.com/radical-cybertools/radical.entk.git "$REPO_DIR" | ||
|
||
cd "$REPO_DIR" || true | ||
git apply "$SCRIPTS_DIR/entk.patch" | ||
pip install . | ||
|
34 changes: 34 additions & 0 deletions
34
src/seavea-hackathon-2025/drafts/entk-darshan/scripts/entk.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
diff --git a/src/radical/entk/tools/darshan.py b/src/radical/entk/tools/darshan.py | ||
index c64a005a..f47ddc70 100644 | ||
--- a/src/radical/entk/tools/darshan.py | ||
+++ b/src/radical/entk/tools/darshan.py | ||
@@ -103,7 +103,7 @@ def cache_darshan_env(darshan_runtime_root: Optional[str] = None, | ||
if out is not None: | ||
out = out.strip() | ||
|
||
- if ret or not out: | ||
+ if ret or not out or 'DARSHAN_LOG_DIR_PATH' in out: | ||
print(f'[WARNING] Darshan log path not set: "{err}"') | ||
_darshan_log_path = '%(sandbox)s/darshan_logs' | ||
|
||
@@ -245,8 +245,9 @@ def get_parsed_data(log: str, target_counters: Union[str, List[str]]) -> set: | ||
return data | ||
|
||
grep_patterns = '-e ' + ' -e '.join(ru.as_list(target_counters)) | ||
- parser_cmd = (f'darshan-parser {log} | grep {grep_patterns} | ' | ||
- "awk '{print $5\":\"$6}'") | ||
+ parser_cmd = (f'env LD_PRELOAD="{_darshan_runtime_root}' | ||
+ f'/lib/libdarshan-util.so" darshan-parser {log} | ' | ||
+ f'grep {grep_patterns} | ' + "awk '{print $5\":\"$6}'") | ||
out, err, ret = ru.sh_callout(parser_cmd, env=_darshan_env, shell=True) | ||
if ret: | ||
print(f'[ERROR] Darshan not able to parse "{log}": {err}') | ||
@@ -282,7 +283,7 @@ def annotate_task_with_darshan(task: Task) -> None: | ||
|
||
log_files = [] | ||
if '%(sandbox)s' in _darshan_log_path: | ||
- for log in glob.glob(_darshan_log_path % {'sandbox': task.path}): | ||
+ for log in glob.glob(_darshan_log_path % {'sandbox': task.path} + '/*'): | ||
log_files.append(log) | ||
else: | ||
if _start_datetime is None: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
dependencies: | ||
- openmpi | ||
- pip | ||
- setuptools | ||
- wheel | ||
- pip: | ||
- git+https://github.com/radical-cybertools/radical.utils.git@devel | ||
- git+https://github.com/radical-cybertools/radical.gtod.git@devel | ||
- git+https://github.com/radical-cybertools/radical.pilot.git@devel | ||
- git+https://github.com/radical-cybertools/radical.entk.git@devel | ||
- git+https://github.com/radical-cybertools/radical.analytics.git@devel | ||
|
Binary file added
BIN
+103 KB
src/seavea-hackathon-2025/radical-entk-hackathon/images/entk-pst-model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.