From f08b3d1398fcb1a4dcd9294260a16e904e2f7e38 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 15 Feb 2025 10:36:06 -0600 Subject: [PATCH 1/2] Add missing tick to the domain query --- docs/07-iowarp/02-runtime/02-code-example.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/07-iowarp/02-runtime/02-code-example.md b/docs/07-iowarp/02-runtime/02-code-example.md index f4918bbf..89c729e5 100644 --- a/docs/07-iowarp/02-runtime/02-code-example.md +++ b/docs/07-iowarp/02-runtime/02-code-example.md @@ -103,7 +103,7 @@ client.Create( ``` ``Create`` will create a ChiPool. This pool will span all nodes -(`chi::DomainQuery::GetGlobalBcast()``) and will +(``chi::DomainQuery::GetGlobalBcast()``) and will be registered first by Chimaera Admin's first container (``chi::DomainQuery::GetDirectHash(chi::SubDomainId::kGlobalContainers, 0)``). By default, there will be one container per node in the provided domain. From 37a240b1af6778c8092d9f5691a71294f5ac2d1a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 17 Feb 2025 08:17:13 -0600 Subject: [PATCH 2/2] Add ppi doc and change to iowarp repo --- docs/07-iowarp/01-index.md | 6 +- docs/07-iowarp/01-shared-memory/01-index.md | 24 +- .../01-jarvis-util/01-index.md | 3 + .../01-jarvis-util/02-program-execution.md | 157 ++++ .../01-jarvis-util/03-builtin-wrappers.md | 33 + .../01-jarvis-util/04-argument-parsing.md | 700 ++++++++++++++++++ .../01-jarvis-util/05-hostfile.md | 77 ++ .../02-jarvis-cd/01-index.md | 156 ++++ .../02-jarvis-cd/02-getting-started.md | 227 ++++++ .../02-jarvis-cd/03-resource-graph.md | 115 +++ .../02-jarvis-cd/04-design-motivation.md | 117 +++ .../02-jarvis-cd/05-pipeline-scripts.md | 80 ++ .../02-jarvis-cd/06-pipeline-tests.md | 130 ++++ .../02-jarvis-cd/07-pipeline-indexes.md | 136 ++++ .../02-jarvis-cd/08-custom-repos.md | 78 ++ .../02-jarvis-cd/09-building-package.md | 450 +++++++++++ .../02-jarvis-cd/10-python-api.md | 128 ++++ .../02-jarvis-cd/11-schedulers.md | 46 ++ .../02-jarvis-cd/12-packages/_category_.yml | 1 + .../02-jarvis-cd/12-packages/cm1.md | 70 ++ .../02-jarvis-cd/12-packages/deepdrivemd.md | 326 ++++++++ .../02-jarvis-cd/12-packages/gadget2.md | 47 ++ .../02-jarvis-cd/12-packages/gadget4.md | 27 + .../02-jarvis-cd/12-packages/lammps.md | 122 +++ .../02-jarvis-cd/12-packages/nyx.md | 121 +++ .../02-jarvis-cd/12-packages/openfoam.md | 68 ++ .../02-jarvis-cd/12-packages/vpic-kokkos.md | 47 ++ .../02-jarvis-cd/12-packages/vpic.md | 35 + .../02-jarvis-cd/12-packages/wrf.md | 473 ++++++++++++ .../02-jarvis-cd/20-future-work.md | 8 + .../02-jarvis-cd/images/pipeline.svg | 1 + .../_category_.yml | 1 + 32 files changed, 3998 insertions(+), 12 deletions(-) create mode 100644 docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/01-index.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/02-program-execution.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/03-builtin-wrappers.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/04-argument-parsing.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/05-hostfile.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/01-index.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/02-getting-started.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/03-resource-graph.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/04-design-motivation.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/05-pipeline-scripts.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/06-pipeline-tests.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/07-pipeline-indexes.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/08-custom-repos.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/09-building-package.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/10-python-api.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/11-schedulers.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/_category_.yml create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/cm1.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/deepdrivemd.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget2.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget4.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/lammps.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/nyx.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/openfoam.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic-kokkos.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/wrf.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/20-future-work.md create mode 100644 docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/images/pipeline.svg create mode 100644 docs/07-iowarp/03-platform-plugins-interface/_category_.yml diff --git a/docs/07-iowarp/01-index.md b/docs/07-iowarp/01-index.md index 4b186d2c..4d8c501f 100644 --- a/docs/07-iowarp/01-index.md +++ b/docs/07-iowarp/01-index.md @@ -18,11 +18,11 @@ echo ". ${PWD}/share/spack/setup-env.sh" >> ~/.bashrc source ~/.bashrc ``` -### Clone the GRC Spack Repo +### Clone the IoWarp Spack Repo ```bash cd ${HOME} -git clone https://github.com/grc-iit/grc-repo -spack repo add grc-repo +git clone https://github.com/iowarp/iowarp-install.git +spack repo add iowarp-install/iowarp-spack ``` ## Install IOWARP: USERS diff --git a/docs/07-iowarp/01-shared-memory/01-index.md b/docs/07-iowarp/01-shared-memory/01-index.md index 57f3c1fd..a3e55bb0 100644 --- a/docs/07-iowarp/01-shared-memory/01-index.md +++ b/docs/07-iowarp/01-shared-memory/01-index.md @@ -4,23 +4,29 @@ This library contains a variety of data structures and synchronization primitive [![Coverage Status](https://coveralls.io/repos/github/lukemartinlogan/hermes_shm/badge.svg?branch=master)](https://coveralls.io/github/lukemartinlogan/hermes_shm?branch=master) -## Installation: Users +## Spack Install + +### Clone the IoWarp Spack Repo +```bash +cd ${HOME} +git clone https://github.com/iowarp/iowarp-install.git +spack repo add iowarp-install/iowarp-spack +``` + +### Installation: Users For those installing this component (rather than all of iowarp): ```bash -git clone https://github.com/grc-iit/grc-repo.git -spack repo add grc-repo -spack install hermes_shm +spack install cte-hermes-shm +spack load cte-hermes-shm ``` -## Installation: Devs +### Installation: Devs This will install dependencies of hermes-shm: ```bash -git clone https://github.com/grc-iit/grc-repo.git -spack repo add grc-repo -spack install hermes_shm +nocompile -spack load hermes_shm +nocompile +spack install cte-hermes-shm +nocompile +spack load cte-hermes-shm ``` NOTE: spack load needs to be done for each new terminal. diff --git a/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/01-index.md b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/01-index.md new file mode 100644 index 00000000..f6992683 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/01-index.md @@ -0,0 +1,3 @@ +# Introduction + +`jarvis-util` contains various helpers which are useful for executing programs external to python within Python. We provide various wrappers to support parallel execution (MPI, PSSH), remote execution (SSH), and local execution (exec/fork). We also provide functions which wrap around various helpful commands, such as filesystem commands (ls, rm, pkill, mkdir). diff --git a/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/02-program-execution.md b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/02-program-execution.md new file mode 100644 index 00000000..f5002305 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/02-program-execution.md @@ -0,0 +1,157 @@ +# Program Execution + +```python +from jarvis_util.shell.exec import Exec +``` + +`Exec` is used to execute a binary program as a subprocess in Python. `Exec` can be used for local, remote, or parallel execution of code. Exec is currently a wrapper around the following libraries: + +1. Subprocess: executes a program locally on a machine. We use shell=True here. The intention is to be equivalent to a bash script. +2. SSH: executes a program remotely using SSH. This has only been tested on Linux. It is equivalent to executing "ssh" in the terminal. +3. Parallel SSH (PSSH): executes a program on multiple remote hosts. Relies upon the SSH module. +4. Message Passing Interface (MPI): executes a program in parallel using MPI. Only tested over MPICH at this time. + +`Exec` has a simple syntax. It takes as input a command (cmd) and how the command should be executed (`exec_info`). For example, `exec_info` can be used to represent executing the command in parallel using MPI or locally on a machine using subprocess. + +```python +from jarvis_util.shell.exec import Exec +Exec(cmd, exec_info) +``` + +Exec can be called with only specifying "cmd". In this case, the command will be executed locally. It's output will be printed to the terminal. + +```python +from jarvis_util.shell.exec import Exec +Exec(cmd) +``` + +## `ExecInfo` + +`ExecInfo` stores all information which may be needed to execute a command with a particular protocol. This includes information such as the location of private/public keys, hostfiles, environment variables. `ExecInfo` also includes parameters for collecting output from commands. + +```python +ExecInfo(exec_type=ExecType.LOCAL, nprocs=None, ppn=None, + user=None, pkey=None, port=None, hostfile=None, env=None, + sleep_ms=0, sudo=False, cwd=None, hosts=None, + collect_output=None, pipe_stdout=None, pipe_stderr=None, + hide_output=None, exec_async=False, stdin=None) +``` + +### Specifying execution method (e.g., SSH vs MPI) + +There are many ways to execute a command: Subprocess, SSH, etc. To specify this, there is an enum with all currently supported methods. The supported methods are: + +1. `ExecType.LOCAL` +2. `ExecType.SSH` +3. `ExecType.PSSH` +4. `ExecType.MPI` + +Setting `exec_type` will spawn the command using the particular approach. By default, `exec_type` is `ExecType.LOCAL`. + +### Managing output from commands + +ExecInfo has three parameters for collecting output from commands: + +1. `collect_output`: Whether to store the output from the command in a buffer in Python. Will impact memory utilization if the command has large output. This is `False` by default. +2. `pipe_stdout`: Store stdout in a file. By default, this is `None`. +3. `pipe_stderr`: Store stderr in a file. By default, this is `None`. +4. `hide_output`: Don't print output. + +Unlike typical subprocess, you can perform any combination of these. Output can be collected at the same time it's being printed. This is particularly useful if you have a long-running process you want to collect output from AND ensure is still progressing. This is accomplished by spawning two threads: one for collecting stderr, and another for collecting stdout. + +### Asynchronous execution + +ExecInfo enables the ability to execute a command asynchronously. This is particularly useful for running a daemon. For example, deploying a storage system requires the storage system to run as a service. This can cause the program to block forever unless asynchronous execution is enabled. Async execution is specified using the `exec_async=True`. + +## `LocalExec` + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo +``` + +The simplest way to execute a program locally is as follows: + +```python +from jarvis_util.shell.exec import Exec +node = Exec('echo hello') +``` + +This will print "hello" to the console. + +However, if more control is needed, a `LocalExecInfo` contains many helpful paramters. +The following demonstrates various examples of outputs: + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo + +# Will ONLY print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=False)) +# Will collect AND print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True)) +# Will collect BUT NOT print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True, + hide_output=True)) +# Will collect, pipe to file, and print to terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True, + pipe_stdout='/tmp/stdout.txt', + pipe_stderr='/tmp/stderr.txt')) +``` + +To execute a program asynchronously, one can do: + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo + +node = Exec('echo hello', LocalExecInfo(exec_async=True)) +node.wait() +``` + +## `SshExec` + +The following code will execute the "hostname" command on the local host using SSH. + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.ssh_exec import SshExecInfo + +node = Exec('hostname', SshExecInfo(hosts='localhost')) +``` + +## `PsshExec` + +The following code will execute the "hostname" command on all machines in the hostfile + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.pssh_exec import PsshExecInfo + +node = Exec('hostname', PsshExecInfo(hostfile="/tmp/hostfile.txt")) +``` + +## `MpiExec` + +The following code will execute the "hostname" command on the local machine 24 times using MPI. + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.mpi_exec import MpiExecInfo + +node = Exec('hostname', MpiExecInfo(hostfile=None, + nprocs=24, + ppn=None)) +``` + +The following code will execute the "hostname" command on 4 nodes (specified in hostfile) using MPI. +"ppn" stands for processes per node. + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.mpi_exec import MpiExecInfo + +node = Exec('hostname', MpiExecInfo(hostfile="/tmp/hostfile.txt", + nprocs=4, + ppn=1)) +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/03-builtin-wrappers.md b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/03-builtin-wrappers.md new file mode 100644 index 00000000..f8aa0d5d --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/03-builtin-wrappers.md @@ -0,0 +1,33 @@ +# Built-in Wrappers + +We have various wrappers to support much shell functionality. At this time, these have been built and tested for Linux. These codes inherit from the `Exec` class shown in Section 1. This way, they can be executed locally or in parallel. + +## Creating + Deleting Directories + +We provide various wrappers for filesystem commands. + +```python +from jarvis_util.shell.filesystem import Mkdir +from jarvis_util.shell.filesystem import Rm + +# Creates two directories "path1" and "path2" +Mkdir(['path1', 'path2']) +# Creates a single directory path3 +Mkdir('path3') + +# Remove two directories (including subdirectories + files) +Rm(['path1', 'path2']) +# Remove a single directory +Rm('path3') +``` + +## Killing Processes + +We provide a wrapper for pkill, which can kill processes in parallel + +```python +from jarvis_util.shell.process import Kill + +# Kill all processes matching pattern +Kill('hermes') +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/04-argument-parsing.md b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/04-argument-parsing.md new file mode 100644 index 00000000..b84ec56a --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/04-argument-parsing.md @@ -0,0 +1,700 @@ +# Argument Parsing + +We provide a custom argument parsing implementation. The main difference between +this argparser and others is that we support the concept of "menus". +Jarvis calls sub-modules, which each have their own specific interfaces. +There are sub-modules for modifying the resource graph, initializing jarvis, +creating pipelines, etc. These modules each have different parameter spaces. + +## Defining Arguments + +The `ArgParse` class has an abstract method called define_options. Create +a class inheriting from that method and use define_options to make the menu. + +We have the example [basic_argparse.py](https://github.com/grc-iit/jarvis-util/blob/master/example/basic_argparse.py) below: + +```python +from jarvis_util.util.argparse import ArgParse + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu() + self.add_args([ + { + 'name': 'hello', + 'msg': 'A message to print', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + }, + { + 'name': 'hello_optional', + 'msg': 'An optional message to print', + 'type': str, # The type of the variable to produce + 'default': 'no optional message given', + 'required': False, # This argument is not required + 'pos': True, # This is a positional argument + }, + { + 'name': 'hello_kwarg', + 'msg': 'An integer keyword argument to print', + 'type': int, # The type of the variable + 'default': 0, + }, + ]) + + # When add_menu has no parameters, process_args will call this function + def main_menu(self): + # Parsed parameters are placed in self.kwargs + print(self.kwargs['hello']) + print(self.kwargs['hello_optional']) + print(self.kwargs['hello_kwarg']) + print(self.real_kwargs) + + +args = MyArgParse() +args.process_args() +``` + +### Required Positional Arguments + +The first argument we defined was "hello", which is a required parameter. + +```python +{ + 'name': 'hello', + 'msg': 'A message to print', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument +} +``` + +This is the only required parameter in this case. We can run the program +above with only this one parameter. + +```bash +python3 example/basic_argparse.py 'my required msg' +``` + +Output: + +``` +my required msg +no optional message given +0 +{'hello_kwarg': 0, 'hello': 'my required msg', 'hello_optional': 'no optional message given'} +{'hello': 'my required msg'} +``` + +### Optional Positional Arguments + +The second argument we defined was "hello_optional", which is optional. + +```python +{ + 'name': 'hello_optional', + 'msg': 'An optional message to print', + 'type': str, # The type of the variable to produce + 'default': 'no optional message given', + 'required': False, # This argument is not required + 'pos': True, # This is a positional argument +} +``` + +To input the optional positional parameter, run the command below: + +```bash +python3 example/basic_argparse.py 'my required msg' 'my optional message' +``` + +Output: + +``` +my required msg +my optional message +0 +{'hello_kwarg': 0, 'hello': 'my required msg', 'hello_optional': 'my optional message'} +{'hello': 'my required msg', 'hello_optional': 'my optional message'} +``` + +### `keyword` Arguments + +The third argument we defined was "hello_kwarg". keyword arguments are +always optional. The default value, if not specified, will be None. + +```python +{ + 'name': 'hello_kwarg', + 'msg': 'An integer keyword argument to print', + 'type': int, # The type of the variable + 'default': 0, +} +``` + +The following commands are all correct and mean the same thing. You can +use -- and - in front of keyword arguments. They mean the same thing. +We support this for more legacy reasons. + +```bash +python3 example/basic_argparse.py 'my required msg' hello_kwarg=124 +python3 example/basic_argparse.py 'my required msg' --hello_kwarg=124 +python3 example/basic_argparse.py 'my required msg' -hello_kwarg=124 +``` + +In each case, the output is: + +``` +my required msg +no optional message given +124 +{'hello_kwarg': 124, 'hello': 'my required msg', 'hello_optional': 'no optional message given'} +{'hello': 'my required msg', 'hello_kwarg': 124} +``` + +### Detecting Explicitly Set Parameters + +Sometimes, it's good to know what parameters the user set explicitly, without filling in default values for everything. self.kwargs stores the entire parameter scope with default values filled in, whereas self.real_kwargs stores the values passed in specifically by the users. + +``` +python3 example/basic_argparse.py 'my required msg' hello_kwarg=124 +``` + +Output: + +``` +my required msg +no optional message given +124 +{'hello_kwarg': 124, 'hello': 'my required msg', 'hello_optional': 'no optional message given'} +{'hello': 'my required msg', 'hello_kwarg': 124} +``` + +Notice that self.real_kwargs (last line) does not have 'hello_optional', since it was not passed explicitly by the user. + +### Help + +You can print the help message by using the "h" or "help" keyword arguments. +These are provided automatically and should not be manually defined. + +```bash +python3 example/basic_argparse.py h +python3 example/basic_argparse.py -h +python3 example/basic_argparse.py --help +python3 example/basic_argparse.py -help +python3 example/basic_argparse.py help +``` + +In each case, the output is: + +```bash +USAGE: basic_argparse.py [hello] [hello_optional (opt)] ... + +Name Default Type Description +-------------- ------------------------- ------ ------------------------------------- +hello str A message to print +hello_optional no optional message given str An optional message to print +hello_kwarg 0 int An integer keyword argument to print +help False bool Print help menu +h False bool Print help menu +``` + +## Menus + +Let's say we're building an application launcher. Each application has it's +own parameter space. We have two applications: + +1. VPIC: A particle simulator code +2. BD-CATS: A particle clustering code + +VPIC only has one operation: to generate the particle data. + +BD-CATS has two operations: cluster the particle data and then visualize +the clustering by rendering an image at a certain resolution. + +Below is the code from [example/menu_argparse.py](https://github.com/grc-iit/jarvis-util/blob/master/example/menu_argparse.py): + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu('vpic') + self.add_args([ + { + 'name': 'steps', + 'msg': 'Number of execution steps', + 'type': int, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } + ]) + + self.add_menu('bd-cats run') + self.add_args([ + { + 'name': 'path', + 'msg': 'Path to particle data', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } + ]) + + self.add_menu('bd-cats draw') + self.add_args([ + { + 'name': 'resolution', + 'msg': 'Dimensions of the image to create', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } + ]) + + def vpic(self): + print(f'Starting VPIC with {self.kwargs["steps"]} steps') + + def bd_cats_run(self): + print(f'Starting BD-CATS with {self.kwargs["path"]}') + + def bd_cats_draw(self): + print(f'Drawing BD-CATS output at {self.kwargs["resolution"]}') + + +args = MyArgParse() +args.process_args() +``` + +process_args will execute the function corresponding to the menu name. +In this case, the menu names are: + +1. vpic +2. bdcats run +3. bdcats draw + +### VPIC Menu + +The following code defines the VPIC menu + +```python +self.add_menu('vpic') +self.add_args([ + { + 'name': 'steps', + 'msg': 'Number of execution steps', + 'type': int, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } +]) +``` + +To execute the VPIC menu: + +``` +python3 example/menu_argparse.py vpic 24 +``` + +Output: + +```bash +Starting VPIC with 24 steps +``` + +### BD-CATS Run Menu + +```python +self.add_menu('bd-cats draw') +self.add_args([ + { + 'name': 'resolution', + 'msg': 'Dimensions of the image to create', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } +]) +``` + +process_args will search for the method name corresponding to 'bd-cats draw'. +The corresponding method name replaces all spaces with '\_' and all '-' with +'\_'. In this case, it will search for bd_cats_run. + +To execute the BD-CATS run menu: + +``` +python3 example/menu_argparse.py bd-cats run /tmp/output.bin +``` + +Output: + +```bash +Starting BD-CATS with /tmp/output.bin +``` + +### BD-CATS Draw Menu + +```python +self.add_menu('bd-cats draw') +self.add_args([ + { + 'name': 'resolution', + 'msg': 'Dimensions of the image to create', + 'type': str, # The type of this variable + 'required': True, # This argument is required + 'pos': True, # This is a positional argument + } +]) +``` + +process_args will search for the method name corresponding to 'bd-cats draw'. +The corresponding method name replaces all spaces with '\_' and all '-' with +'\_'. In this case, it will search for bd_cats_draw. + +To execute the BD-CATS draw menu: + +``` +python3 example/menu_argparse.py bd-cats run /tmp/output.bin +``` + +Output: + +```bash +Drawing BD-CATS output at 4096x4096 +``` + +## Argument Types + +We currently support five main types of arguments: + +1. Strings +2. Integers +3. Floats +4. Booleans +5. Lists of the above types + +Of these, booleans and lists are somewhat special. + +### Boolean Arguments + +Booleans are special in the sense that they have a special command line +syntax when used as keyword arguments (as opposed to positional). + +Let's say we are at a restaurant and are ordering pasta. You have +two options: with cheese and without cheese. + +Below is the code from [example/boolean_spaghetti.py](https://github.com/grc-iit/jarvis-util/blob/master/example/boolean_spaghetti.py). + +```python +from jarvis_util.util.argparse import ArgParse + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu('spaghetti') + self.add_args([ + { + 'name': 'cheese', + 'msg': 'Whether to use cheese', + 'type': bool, # The type of this variable + 'default': True + } + ]) + + def spaghetti(self): + if self.kwargs['cheese']: + print('I will take the spaghetti with cheese') + else: + print('I want actual Italian, and will not take your cheese') + + +args = MyArgParse() +args.process_args() +``` + +#### `True` + +There are two ways to indicate truth + +```bash +python3 example/boolean_spaghetti.py spaghetti --cheese=true +python3 example/boolean_spaghetti.py spaghetti +cheese +``` + +Output: + +```bash +I will take the spaghetti with cheese +``` + +#### `False` + +There are two ways to indicate false + +```bash +python3 example/boolean_spaghetti.py spaghetti --cheese=false +python3 example/boolean_spaghetti.py spaghetti -cheese +``` + +Output: + +```bash +I want actual Italian, and will not take your cheese +``` + +### List Arguments + +There are many cases where having a list of information is needed. +One case that comes up is deciding which nodes in a distributed system +to execute software. For this case, we use a list of strings. + +Below we have an example of running the VPIC application on a set +of machines in parallel. + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu('vpic run', + keep_remainder=False) + self.add_args([ + { + 'name': 'hosts', + 'msg': 'A list of hosts and threads pr', + 'type': list, + 'args': [ + { + 'name': 'host', + 'msg': 'A string representing a host', + 'type': str, + } + ] + } + ]) + + def vpic_run(self): + print(self.kwargs['hosts']) + + +args = MyArgParse() +args.process_args() +``` + +To pass a list of strings, run the following: + +```bash +python3 example/hostfile_test.py vpic run --hosts="[127.0.0.1, 10.0.0.1]" +``` + +Output: + +```bash +['127.0.0.1', '10.0.0.1'] +``` + +### Nested List Arguments + +Sometimes, you may want to have a list of lists. We use YAML format +to parse such a thing. + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu('vpic run', + keep_remainder=False) + self.add_args([ + { + 'name': 'hosts', + 'msg': 'A list of hosts and threads per-host', + 'type': list, + 'args': [ + { + 'name': 'host', + 'msg': 'Host name', + 'type': str, + }, + { + 'name': 'count', + 'msg': 'The number of devices to search for', + 'type': int, + } + ] + } + ]) + + +args = MyArgParse() +args.process_args() +``` + +To pass a list of strings and ints, run the following: + +```bash +python3 example/hostfile_threads_test.py vpic run --hosts="[[127.0.0.1, 4], [10.0.0.1, 4]]" +``` + +Output: + +```bash +[['127.0.0.1', 4], ['10.0.0.1', 4]] +``` + +## Tracking Remaining Arguments + +Sometimes, you don't want your argument parser to error if a parameter is +unrecognized. You may want to implement a special syntax for parsing the +remaining parameters. + +In this case, menus provide the "keep_remainder" option and "remainder_as_kv" options. + +### List Remainder + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu(keep_remainder=True) + self.add_args([ + { + 'name': 'hi', + 'msg': 'hello', + 'type': str, + 'default': None + } + ]) + + def main_menu(self): + print(self.kwargs['hi']) + print(self.remainder) + + +args = MyArgParse() +args.process_args() +``` + +This will create a keyword argument name "hi", which takes as input a string, +and keeps the remaining arguments in a list named "self.remainder". + +Run the following command: + +```bash +python3 example/remainder.py --hi=hi 1 2 3 4 5 +``` + +Output: + +```bash +hi +['1', '2', '3', '4', '5'] +``` + +### Key-Value Remainder + +Sometimes the remainder should be stored as a dict instead of +a list. + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu(keep_remainder=True, + remainder_as_kv=True) + self.add_args([ + { + 'name': 'hi', + 'msg': 'hello', + 'type': str, + 'default': None + } + ]) + + def main_menu(self): + print(self.kwargs['hi']) + print(self.remainder_kv) + + +args = MyArgParse() +args.process_args() +``` + +This will create a keyword argument name "hi", which takes as input a string, and keeps the remaining arguments in a dict named "self.remainder_kv". + +Run the following command: + +```bash +python3 example/remainder_kv.py --hi=hi VAR1=25 VAR2=26 +``` + +```bash +hi +{'VAR1': '25', 'VAR2': '26'} +``` + +## Choice Arguments + +Sometimes you have parameters which have a well-defined set of values. +In this case, you can define 'choices'. + +```python +from jarvis_util.util.argparse import ArgParse + + +class MyArgParse(ArgParse): + def define_options(self): + self.add_menu() + self.add_args([ + { + 'name': 'hi', + 'msg': 'hello', + 'type': str, + 'choices': ['a', 'b', 'c'], + 'default': None + } + ]) + + def main_menu(self): + print(self.kwargs['hi']) + + +args = MyArgParse() +args.process_args() +``` + +Example of correct input: + +```bash +python3 example/choices.py hi=a +``` + +```bash +a +``` + +Example of incorrect input: + +```bash +python3 example/choices.py hi=d +``` + +```bash +In the menu , hi=d is not a valid choice +USAGE: choices.py ... + +Name Default Type Description +------ --------- ------ --------------- +hi str hello +help False bool Print help menu +h False bool Print help menu +In the menu , hi was not of type +USAGE: choices.py ... +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/05-hostfile.md b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/05-hostfile.md new file mode 100644 index 00000000..1486d1d1 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/01-jarvis-util/05-hostfile.md @@ -0,0 +1,77 @@ +# Hostfile + +Hostfiles contain a set of machines. + +## Host Text Files + +Hostfiles can be stored as text files on a filesystem. +They have the following syntax: + +``` +ares-comp-01 +ares-comp-[02-04] +ares-comp-[05-09,11,12-14]-40g +``` + +## Hostfile Import + +```python +from jarvis_util.util.hostfile import Hostfile +``` + +## Hostfile Constructor + +The hostfile has the following constructor: + +```python +class Hostfile: + """ + Parse a hostfile or store a set of hosts passed in manually. + """ + + def __init__(self, hostfile=None, all_hosts=None, all_hosts_ip=None, + text=None, find_ips=True): + """ + Constructor. Parse hostfile or store existing host list. + + :param hostfile: The path to the hostfile + :param all_hosts: a list of strings representing all hostnames + :param all_hosts_ip: a list of strings representing all host IPs + :param text: Text of a hostfile + :param find_ips: Whether to construct host_ip and all_host_ip fields + """ +``` + +## Hostfile for the current machine + +To get the localhost file: + +```python +hostfile = Hostfile() +``` + +## Hostfile from a filesystem + +To load a hostfile from the filesystem: + +```python +hostfile = Hostfile(hostfile=f'{HERE}/test_hostfile.txt') +``` + +## Host names and IPs + +To get the host names and IP addresses, the Hostfile stores the `hosts` +and `hosts_ip` variables. They are lists of strings. + +```python +hostfile = Hostfile() +print(hostfile.hosts) +print(hostfile.hosts_ip) +``` + +Output: + +```bash +['localhost'] +['127.0.0.1'] +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/01-index.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/01-index.md new file mode 100644 index 00000000..a0415f4a --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/01-index.md @@ -0,0 +1,156 @@ +# Jarvis-CD +Jarvis-CD is a unified platform for deploying various applications, including +storage systems and benchmarks. Many applications have complex configuration +spaces and are difficult to deploy across different machines. + +We provide a builtin repo which contains various applications to deploy. +We refer to applications as "jarivs pkgs" which can be connected to form +"deployment pipelines". + +## Installation + +### Clone the IoWarp Spack Repo +```bash +cd ${HOME} +git clone https://github.com/iowarp/iowarp-install.git +spack repo add iowarp-install/iowarp-spack +``` + + +### Install Jarvis +```bash +spack external find python +spack install py-ppi-jarvis-cd +``` + +Spack packages must be loaded to use them. +You'll have to do this for each new terminal. +```bash +spack load py-ppi-jarvis-cd +``` + +## Building the Jarvis Configuration + +### Bootstrapping for a single-node machine + +You may be trying to test things on just a single node. + +In this case, run: +```bash +jarvis bootstrap from local +``` + +### Bootstrapping from a specific machine + +Jarvis has been pre-configured on some machines. To bootstrap from +one of them, run the following: + +```bash +jarvis bootstrap from ares +``` + +NOTE: Jarvis must be installed from the compute nodes in Ares, NOT the master node. This is because we store configuration data in /mnt/ssd by default, which is only on compute nodes. We do not store data in /tmp since it will be eventually destroyed. + +To check the set of available machines to bootstrap from, run: +```bash +jarvis bootstrap list +``` + +### Creating a new configuration + +A configuration can be generated as follows: +```bash +jarvis init [CONFIG_DIR] [PRIVATE_DIR] [SHARED_DIR (optional)] +``` + +* **CONFIG_DIR:** A directory where jarvis metadata for pkgs and pipelines +are stored. This directory can be anywhere that the current user can access. +* **PRIVATE_DIR:** A directory which is common across all machines, but +stores data locally to the machine. Some jarvis pkgs require certain data to +be stored per-machine. OrangeFS is an example. +* **SHARED_DIR:** A directory which is common across all machines, where +each machine has the same view of data in the directory. Most jarvis pkgs +require this, but on machines without a global filesystem (e.g., Chameleon Cloud), +this parameter can be set later. + +For a personal machine, these directories can be the same directory. + +## Set the active Hostfile + +The hostfile contains the set of nodes that the pipeline will run over. +This is structured the same way as a traditional MPI hostfile. + +An example hostfile: + +```txt +ares-comp-20 +ares-comp-[21-25] +``` + +To set the active hostfile, run: + +```bash +jarvis hostfile set /path/to/hostfile +``` + +Note that every time you change the hostfile, you will need to update the +pipeline. Jarvis does not automatically detect changes to this file. + +```bash +jarvis ppl update +``` + +## Building the Resource Graph + +NOTE: This step only needs to be run if you did ``jarvis bootstrap from local`` or ``jarvis init``. +If you bootstrap from a specific machine, then skip this section. + +The resource graph is a snapshot of your systems network and storage. +Many packages depend on it for their configurations. The Hermes I/O system, for example, +uses this to identify valid networks and buffering locations. +```bash +jarvis rg build +``` + +## Manual Installation (Mainly Devs) + +### Jarvis-Util +Jarvis-CD depends on jarvis-util. jarvis-util contains functions to execute +binaries in python and collect their output. + +```bash +git clone https://github.com/grc-iit/jarvis-util.git +cd jarvis-util +python3 -m pip install -r requirements.txt +python3 -m pip install -e . +``` + +### Scspkg + +Scspkg is a tool for building modulefiles using a CLI. It's not strictly +necessary for Jarvis to function, but many of the readmes use it to provide +structure to manual installations. + +```bash +git clone https://github.com/grc-iit/scspkg.git +python3 -m pip install -r requirements.txt +python3 -m pip install -e . +echo "module use \`scspkg module dir\`" >> ~/.bashrc +``` + +The wiki for scspkg is [here](https://github.com/grc-iit/scspkg.git). + +### Jarvis-CD + +```bash +cd /path/to/jarvis-cd +python3 -m pip install -r requirements.txt +python3 -m pip install -e . +``` + +### Net Test + +Network test tool for identifying valid networks. +```bash +spack install chi-nettest +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/02-getting-started.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/02-getting-started.md new file mode 100644 index 00000000..ba13c351 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/02-getting-started.md @@ -0,0 +1,227 @@ +# Getting Started + +In this section, we will discuss the jarvis command line interface. +We will discuss the basic steps of how to create a deployment +pipeline. In this example, we will deploy only IOR, but more complex +pipelines with more applications deployed at once can be made. + +## Setup + +```bash +spack install ior +``` + +## Define the Job Hostfile + +Before jarvis can be used to deploy, the hostfile must be provided. + +```bash +jarvis hostfile set [path-to-hostfile.txt] +``` + +This hostfile is used by all jarvis deployments. + +## Build Resource Graph + +NOTE: This step is not needed for machines which have pre-configured +resource graphs. Skip this step when deploying in Ares. Do this step +when deploying on your personal machine. + +First we have to collect information about the system. The resource-graph +utility command depends on fi_info and lsblk. Make sure if these are not +provided by your machine, they are loaded. + +To begin polling the system, run: + +```bash +jarvis rg build +``` + +This information will be stored under `${JARVIS_ROOT}/config/resource_graph.yaml`. The command uses SSH to connect to all pkgs. It will use the hostfile from the previous command and scan those pkgs. + +## Create an empty pipeline + +The following command will create an empty pipeline "my_pipeline": + +```bash +jarvis ppl create ior_test +``` + +After creating a pipeline, jarvis will focus on that pipeline. + +## Updating a pipline + +The very vast majority of problems encountered in Jarvis are because +a pipeline did not get updated. One common mistake is that a hostfile +changes, but the pipeline doesn't get updated. Pipelines initially +build configuration files to each specific program in the pipeline. +For example, a storage system would need to know the changed hosts +and then rebuild its configuration based on those new systems. + +To update a pipeline, run: +```bash +jarvis ppl update +``` + +## Build Environment + +Next, we must make jarvis aware of all environment variables needed +to execute applications in the pipeline. Jarvis automatically +captures most relevant variables, including PATH, LD_LIBRARY_PATH, +etc. This allows Jarvis to forward environment variables through SSH. + +```bash +jarvis ppl env build +``` + +Alternatively, if you have an environment you would like to use +across pipelines, you can create a named environment as follows: + +```bash +jarvis env build my_env_name +``` + +This named environment can then be copied to a pipeline as follows: +```bash +jarvis ppl env copy my_env_name +``` + +NOTE: this is machine-dependent. You would have to have these modules +installed on your machine before-hand. + +NOTE: LD_PRELOAD should NOT be one of the environment variables set here. +Jarvis has specific "Interceptor" pkgs for handling LD_PRELOAD. + +## Add pkgs to the pipeline + +To add pkgs to the pipeline: + +```bash +jarvis ppl append ior api=posix xfer=4k block=1m out=/tmp/ior.bin +``` + +## Start a pipeline + +To start the service for the configured pipeline, do: + +```bash +jarvis ppl run +``` + +## Get the status of the pipeline + +To check whether the pipeline is functioning, do: + +```bash +jarvis ppl status +``` + +NOTE: This command is not always implemented + +## Clean a pipeline + +Pipelines can create a bunch of data. For example, OrangeFS may contain +data leftover from a benchmark. To destroy this data, run: + +```bash +jarvis ppl clean +``` + +## Re-configuring a `pkg` + +There may be cases where you need to reconfigure a pkg in the pipeline. +To do this, run configure: + +```bash +jarvis pkg conf ior api=mpiio +``` + +## Unlinking and removing `pkg`s + +- Unlinking a `pkg` means Jarvis will remove the `pkg` from the pipeline, + without destroying the `pkg`'s metadata. This will allow it to be + re-appended in the future. +- Removing a `pkg` means Jarvis will remove the `pkg` from the pipeline + AND destroy its metadata. + +```bash +jarvis pkg unlink [pkg_id] +jarvis pkg remove [pkg_id] +``` + +## Changing to a different pipeline + +To make jarvis start, end, etc. apply to a different pipeline, +use jarvis cd + +```bash +jarvis cd [pipeline_name] +``` + +NOTE: Jarvis stores the current pipeline in a file. Only one +pipeline can be active at a time for a user. + +## Destroying a pipeline + +To destroy a pipeline, its pkgs, and all metadata associated with it: + +```bash +jarvis ppl destroy [pipeline_name (opt)] +``` + +If no pipeline_name is provided, the current pipeline will be destroyed. + +## Listing Existing Pipelines + +To list all existing pipelines, run: + +```bash +jarvis ppl list +``` + +## Viewing the pkgs in a Pipeline + +```bash +jarvis ppl print +``` + +## Viewing the contents of a Pipeline + +This will print the contents of the pipeline's root directory. + +To view the CONFIG_DIR of the pipeline: +```bash +ls $(jarvis path) +``` + +To view the PRIVATE_DIR of the pipeline: +```bash +ls $(jarvis path +private) +``` + +To view the SHARED_DIR of the pipeline: +```bash +ls $(jarvis path +shared) +``` + +## Clear a pipeline + +The following will remove all pkgs from a pipeline: +```bash +jarvis ppl reset +``` + +## Resetting Jarvis + +The following command will destroy the metadata for all pipelines in Jarvis. +It will ask for confirmation to avoid accidents. + +```bash +jarvis reset +``` + +## Other Examples + +Each Jarvis repo contains its own README, which goes every specific examples. +For example, [gray_scott](https://github.com/scs-lab/jarvis-cd/blob/master/builtin/builtin/gray_scott/README.md) +shows an example of deploying over Hermes. diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/03-resource-graph.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/03-resource-graph.md new file mode 100644 index 00000000..e0e9d0a6 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/03-resource-graph.md @@ -0,0 +1,115 @@ +# Resource Graph + +A resource graph contains a snapshot of the state of a cluster. This +normalizes machine-specific information which common across jarvis +repos. Resource graphs provide query interfaces which avoid having to +repeat the same paths all over the place. This section describes the +contents of a resource graph and the API available to Jarvis repos. + +## Resource Graph Contents + +The resource graph contains information about the cluster hardware: + +1. Block devices +2. Device partitions +3. Filesystem mount points +4. Capacities +5. Network protocols + +NOTE: The resource-graph utility command depends on: + +- fi_info +- chi-nettest +- lsblk +- df + +These are installed automatically with the spack package. + +## Building a Resource Graph Automatically + +Most of the information regarding resource graphs can be introspected. +The main things that are not automatic: + +1. Hostfile: which machines are we introspecting? +2. What is the per-user mount point of the storage devices? + +If you want jarvis to execute distributed programs, provide a hostfile. +This is optional for local programs. + +```bash +jarvis hostfile set /path/to/hostfile +``` + +Build the resource graph + +```bash +jarvis rg build +``` + +The resource graph will be stored under `~/.jarvis/resource_graph.yaml` + +## Storage Graph + +The following command lists all block devices, their type, and their mount points: + +```bash +lsblk -o NAME,SIZE,MODEL,TRAN,MOUNTPOINT +``` + +The following command lists all mounted filesystems and their capacities: + +```bash +df -h +``` + +The following command lists all SPDK nvmes: + +```bash +spdk_nvme list -c +``` + +NOTE: SPDK is not currently implemented + +The storage resource graph parses these commands automatically. + +## Network Graph + +To build the network graph, we collect the outputs from the following command: + +```bash +fi_info +``` + +This will store the network information available per host. + +We use a utility named chi-nettest to identify properties of networks. Mainly, +we use this program to detect whether we can send data over it. + +## CPU Graph + +CPU information can be helpful for determining information such as the number +of threads to use for a metadata service. + +TODO. + +## Memory Graph + +Memory information can be useful for determining things like cache sizes. + +TODO. + +## Querying the Resource Graph + +The resource graph can be queried for network and storage info. + +```python +from jarvis_util import * + +rg = ResourceGraph() +rg.find_storage(shared=True, needs_root=False) +``` + +## Future Ideas + +- What if we have a smart NIC? +- What if we have FPGAs and ASICs? diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/04-design-motivation.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/04-design-motivation.md new file mode 100644 index 00000000..271696ef --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/04-design-motivation.md @@ -0,0 +1,117 @@ +# Design & Motivation + +First we will describe how `jarvis-cd` is organized and the files that it creates. + +## Jarvis Config + +The Jarvis configuration file is stored under `${JARVIS_ROOT}/config/jarvis_config.yaml`. +It stores the `${CONFIG_DIR}`, `${PRIVATE_DIR}`, and `${SHARED_DIR}` variables described in the Home page. +Additionally, it stores the currently-active hostfile and the set of repos to search for pkgs. +This file essentially determines where Jarvis will store metadata for pipelines. + +The resource graph for the current machine is stored under `${JARVIS_ROOT}/config/resource_graph.yaml`. +The resource graph stores information about the machine state, including hardware and networks. + +## `Pkg`s + +Jarvis-cd deploys complex applications. In Jarvis, we consider applications +"`pkg`s". `Pkg`s can be connected to form a pipeline, which is a series of +applications to deploy all at once. + +Jarvis has three general `pkg` types: + +1. Service: a program which runs forever, until forcibly stopped. +2. Application: a program which runs to a definite completion. + For example, run IOR over OrangeFS, BeeGFS, and Hermes. +3. Interceptor: Used to intercept code for a benchmark or storage system. + +## Pipelines + +Jarvis-cd provides a CLI to create pipelines. A pipeline specifies an +ordered set of configured pkgs to execute. An example of a jarvis pipeline +would be as follows: + +![example-pipeline](./images/pipeline.svg) + +## Service Pkgs + +Service pkgs represent long-running applications. An example would be a storage system. + +Storage systems have complex configurations: + +1. What pkgs do metadata servers go on? +2. What pkgs do storage server go on? +3. What storage devices do we use? +4. What interface should the storage device expose? Filesystem? Block device? +5. What networking protocol do we use? +6. How many threads to use? +7. The list goes on and on. + +To make matters worse, each storage system has different ways to represent this +information. Each machine has different paths, different network cards, +different IPs, etc. How do we simplify the deployment of these complex systems? + +At the end of the day, there's no way to get around having a configuration +file. For each storage system, we store configuration files. These +files are stored in the `${PRIVATE_DIR}` and `${SHARED_DIR}` directories stored +in the jarvis configuration. + +```bash +jarvis ppl create ofs-global +jarvis ppl append orangefs +jarvis ppl start & +jarvis ppl stop +``` + +## Application Pkgs + +Applications can include: + +- benchmarks +- simulation codes + +```bash +jarvis ppl create my-benchmark +jarvis cd my-benchmark +jarvis ppl append orangefs +jarvis ppl append hermes +jarvis ppl append ior +``` + +## Interceptor Pkgs + +A library can be used to intercept or monitor some functionality of an +application. For example, a library may include + +```bash +jarvis ppl create my-app +jarvis cd my-app +jarvis ppl append orangefs \ + client_path=${HOME}/llogan/pfs +jarvis ppl append hermes +jarvis ppl append monitor +jarvis ppl append qmcpack +jarvis ppl run +``` + +## Pipeline Metadata + +When creating a pipeline, metadata for the pipeline will be created under +the `${CONFIG_DIR}` from the jarvis config. + +The metadata for the currently-focused pipeline. + +```bash +jarvis create my-pipeline +ls `jarvis path` +``` + +This will print all folders and files relevant to `my-pipeline`. + +When adding pkgs to the pipeline, sub-folders will be created for each individual pkg. +For example, the following will print all files used by the hermes pkg. + +```bash +jarvis append hermes +ls `jarvis path`/hermes +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/05-pipeline-scripts.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/05-pipeline-scripts.md new file mode 100644 index 00000000..ed39cdc0 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/05-pipeline-scripts.md @@ -0,0 +1,80 @@ +# Pipeline Scripts + +Pipeline scripts are useful for storing cross-platform unit tests. +They store all of the information needed to create and execute +a pipeline. + +## Running a pipline script + +Pipeline scripts are YAML files and can be executed as follows: +```bash +jarvis ppl load yaml /path/to/my_pipeline.yaml +jarvis ppl run +``` + +Alternatively, if you want to load + run the script: +```bash +jarvis ppl run yaml /path/to/my_pipeline.yaml +``` + +## Example Pipeline Script + +Below is a small example of a file for testing block device I/O +in a task-based I/O system named Chimaera. + +The script is named ``test_bdev_io.yaml`` + +```yaml +name: chimaera_unit_ipc +env: chimaera +pkgs: + - pkg_type: chimaera_run + pkg_name: chimaera_run + sleep: 10 + do_dbg: true + dbg_port: 4000 + - pkg_type: chimaera_unit_tests + pkg_name: chimaera_unit_tests + TEST_CASE: TestBdevIo + do_dbg: true + dbg_port: 4001 +``` + +## name: chimaera_unit_ipc + +The name of the pipeline that jarvis references. + +The following command would focus the pipeline in jarvis: +```bash +jarvis cd chimaera_unit_ipc +``` + +## env: chimaera + +This command loads a named environment file. +It expects the environmnt to already exist. + +In this example, the environment is expected to +be named ``chimaera`` + +```bash +jarvis env build chimaera +``` + +When you run ``jarvis ppl load yaml test_bdev_io.yaml``, +the environment chimaera will be automatically loaded. + +## pkgs: + +In this section, we define the parameters to each package +in the pipeline. + +Here, we have two packages, chimaera_run (the server) and +chimaera_unit_tests (the client). + +When you run ``jarvis ppl load yaml test_bdev_io.yaml``, +the following commands will be executed internally by Jarvis: +```bash +jarvis ppl append chimaera_run sleep=10 +do_dbg dbg_port=4000 +jarvis ppl append chimaera_unit_tests +do_dbg dbg_port=4001 TEST_CASE=TestBdevIo +``` \ No newline at end of file diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/06-pipeline-tests.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/06-pipeline-tests.md new file mode 100644 index 00000000..dfc8acec --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/06-pipeline-tests.md @@ -0,0 +1,130 @@ +# Pipeline Tests + +Pipeline tests are used to run experiment sets using +a grid search. + +## Example File + +Below is an example of a pipeline for running various +configurations of spark KMeans. + +```yaml +config: + name: mm_kmeans_spark + env: mega_mmap + pkgs: + - pkg_type: spark_cluster + pkg_name: spark_cluster + num_nodes: 1 + - pkg_type: mm_kmeans_df + pkg_name: mm_kmeans_df + path: ${HOME}/mm_data/parquet/kmeans.parquet + window_size: 4g + df_size: 4g + nprocs: 1 + ppn: 16 + type: parquet + k: 1000 + - pkg_type: mm_kmeans + pkg_name: mm_kmeans + path: ${HOME}/mm_data/parquet/* + window_size: 30g + api: spark + max_iter: 4 + k: 8 + do_dbg: False + dbg_port: 4001 +vars: + mm_kmeans_df.window_size: [16m, 64m, 128m, 1g, 2g, 4g] + mm_kmeans_df.df_size: [16m, 64m, 128m, 1g, 2g, 4g] + spark_cluster.num_nodes: [4] +loop: + - [mm_kmeans_df.window_size, mm_kmeans_df.df_size] + - [spark_cluster.num_nodes] +repeat: 1 +output: "${SHARED_DIR}/output_multi" +``` + +## config: + +This section is the skeleton of a pipeline. It has the same exact parameters +as a [pipeline script](05-pipeline-scripts.md). + +This example, the pipeline will be called mm_kmeans_spark and launch a spark +cluster, dataset generator, and spark kmeans in that order. + +## vars: + +Each pkg in the pipeline has a set of variables it exposes. In this example, +we vary the dataset size, a window size, and the number of nodes in the spark cluster. + +The syntax of variables are: ``pkg_name.var_name`` + +## loop: + +This is represents what test loop should look like. In pseudocode, +the above loop would translate to python roughly as follows: + +```python +mm_kmeans_df_window_size = ['16m', '64m', '128m', '1g', '2g', '4g'] +mm_kmeans_df_df_size = ['16m', '64m', '128m', '1g', '2g', '4g'] +spark_cluster_num_nodes = [4] +for window_size, df_size in mm_kmeans_df_window_size, mm_kmeans_df_df_size: + for num_nodes in spark_cluster_num_nodes: + mm_kmeans_spark.configure(window_size, df_size, num_nodes) +``` + +In this example, a total of 6 cases are executed: +``` +16m 16m 4 +64m 64m 4 +128m 128m 4 +1g 1g 4 +2g 2g 4 +4g 4g 4 +``` + +By having a separate loop section, you can define certain variables as together or independent +to reduce the number of total test cases. In this example, ``mm_kmeans_df.window_size`` and ``mm_kmeans_df.df_size`` +vary together, but independently from ``spark_cluster.num_nodes``. + +``mm_kmeans_df.window_size`` and ``mm_kmeans_df.df_size`` must have the same size (in this case 6). + +# repeat: + +The number of times each experiment should be conducted. For example, +this can be used to calculate the average across experiment runs to +get a better understanding of variability and noise in your study. + +In this example, experiments are only conducted once. + +# output + +This is the directory where the results are stored. Note, jarvis stores +the pipeline's shared directory, private directory, and configuration directory +in the following three environment variables: ``${SHARED_DIR}``, ``${PRIVATE_DIR}``, +and ``${CONFIG_DIR}``. + +By default, the output of this is going to be a dataset with each variable as a parameter: +``` +[mm_kmeans_df.window_size] [mm_kmeans_df.df_size] [spark_cluster.num_nodes] +``` + +To get more columns, pkgs can define a custom ``_get_stat()`` function. This is more +for developers than users: Below is an example of a custom stat for the YCSB benchmark, +which analyzes the output of YCSB for its throughput and total runtime. +```python +class Ycsb: + def _get_stat(self, stat_dict): + """ + Get statistics from the application. + + :param stat_dict: A dictionary of statistics. + :return: None + """ + output = self.exec.stdout['localhost'] + if 'throughput(ops/sec)' in output: + throughput = re.search(r'throughput\(ops\/sec\): ([0-9.]+)', output).group(1) + stat_dict[f'{self.pkg_id}.throughput'] = throughput + stat_dict[f'{self.pkg_id}.runtime'] = self.start_time +``` \ No newline at end of file diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/07-pipeline-indexes.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/07-pipeline-indexes.md new file mode 100644 index 00000000..28cd658b --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/07-pipeline-indexes.md @@ -0,0 +1,136 @@ +# Pipeline Indexes + +Pipeline indexes are folders containing pipeline scripts. +They can be used to disseminate working examples of your code. +For example, pipelines scripts used for unit tests would be good to have +in a pipeline index. + +## Adding a Pipeline Index + +Pipeline indexes are stored within repos as a subdirectory named +``pipelines``. It is required to be named ``pipelines``. + +Below is an example structure of a jarvis repo containing a pipeline index. +```bash +jarvis_chimaera # Repo +├── jarvis_chimaera # Jarvis Packages +│ ├── chimaera_bw_bench +│ ├── chimaera_docker +│ ├── chimaera_latency_bench +│ ├── chimaera_run +│ ├── chimaera_unit_tests +│ └── chimaera_zlib_bench +└── pipelines # Pipeline Index + ├── bench_bw_ipc.yaml + ├── bench_latency_ipc.yaml + ├── test_bdev_io.yaml + ├── test_bdev_ram.yaml + ├── test_bulk_ipc.yaml + ├── test_bulk_read_ipc.yaml + ├── test_bulk_write_ipc.yaml + ├── test_compress.yaml + ├── test_ipc_rocm.yaml + ├── test_ipc.yaml + ├── test_python.yaml + ├── test_serialize.yaml + └── test_upgrade.yaml +``` + +Below is another example with an index containing subdirectories: +```bash +jarvis_hermes # Repo +├── jarvis_hermes # Jarvis Packages +│ ├── hermes_api +│ │ ├── pkg.py +│ │ └── README.md +│ ├── hermes_api_bench +│ │ ├── pkg.py +│ │ └── README.md +└── pipelines # Pipeline Index + ├── hermes + │ └── test_hermes.yaml + ├── mpiio + │ ├── test_hermes_mpiio_basic_async.yaml + │ ├── test_hermes_mpiio_basic_sync.yaml + │ └── test_mpiio_basic.yaml + ├── nvidia_gds + │ ├── test_hermes_nvidia_gds.yaml + │ └── test_nvidia_gds_basic.yaml + ├── posix + │ ├── test_hermes_posix_basic_large.yaml + │ ├── test_hermes_posix_basic_mpi_large.yaml + │ ├── test_hermes_posix_basic_mpi_small.yaml + ├── stdio + │ ├── test_hermes_stdio_adapter_bypass.yaml + │ ├── test_hermes_stdio_adapter_default.yaml + │ ├── test_hermes_stdio_adapter_scratch.yaml + │ ├── test_hermes_stdio_basic_large.yaml + ├── test_borg.yaml + ├── test_ior.yaml + └── vfd + ├── test_hermes_vfd_basic.yaml + ├── test_hermes_vfd_python.yaml + ├── test_hermes_vfd_scratch.yaml + └── test_vfd_python.yaml +``` + +## List indexes + +Since pipeline indexes are stored in repos, just list +the repos +```bash +jarvis repo list +``` + +## Index Queries + +In the commands below, many commands have the parameter ``[index_query]``. +An index query is a dotted string in the following format: +``` +[repo_name].[subdir1]...[subdirN].[script] +``` + +For example: +``` +jarvis_chimaera.bench_bw_ipc +jarvis_hermes.hermes.test_hermes +``` + +NOTE: index queries do not include file extensions. + +## Use a script from an index +To call a pipeline script stored in an index directly, you +can do: + +```bash +jarvis ppl index load [index_query] +``` + +For example: +```bash +jarvis ppl index load jarvis_chimaera.bench_bw_ipc +jarvis ppl index load jarvis_hermes.hermes.test_hermes +``` + +## Copy a script from an index + +You can copy a pipeline script from an index to your current +directory or some other directory. You can then edit the +parameters to the script and the call ``jarvis ppl load yaml`` +on your modified script. + +To copy the script from an index: +```bash +jarvis ppl index copy [index_query] [output (optional)] +``` + +Parameters: +* index_query: a dotted string indicating the script in the index to copy +* output: a directory of file to copy the script to. If output is not provided, +it will copy to the current working directory. + +For example: +```bash +jarvis ppl index copy jarvis_chimaera.bench_bw_ipc +jarvis ppl index copy jarvis_hermes.hermes.test_hermes +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/08-custom-repos.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/08-custom-repos.md new file mode 100644 index 00000000..c22df5f3 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/08-custom-repos.md @@ -0,0 +1,78 @@ +# Custom Repos + +There are cases where your organization may have packages used internally and +do not want to have to wait to be integrated into the builtin repo. + +## Repo structure + +Custom repos have the following structure: + +``` +my_org_name +└── my_org_name + └── orangefs + └── package.py +``` + +## Register a custom repo + +You can then register the repo as follows: + +```bash +jarvis repo add /path/to/my_org_name +``` + +Whenever a new repo is added, it will be the first place +jarvis searches for pkgs. + +## Creating pkgs from a template + +You can then add pkgs to the repo as follows: + +```bash +jarvis repo create [name] [pkg_class] +``` + +pkg_class can be one of: + +- service +- app +- interceptor + +For example: + +```bash +jarvis repo create hermes service +``` + +The repo will then look as follows: + +``` +my_org_name +└── my_org_name + ├── hermes + │   └── package.py + └── orangefs + └── package.py +``` + +## Promoting a repo + +Jarvis searches repos in a certain order. To make a repo the first place +that jarvis searches, run: + +```bash +jarvis repo promote [repo_name] +``` + +## Remove a repo from consideration + +Sometimes a repo needs to be removed entirely from consideration. +To do this, run: + +```bash +jarvis repo remove [repo_name] +``` + +This will not destroy the contents of the repo, it will simply unregister +the repo from Jarvis. diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/09-building-package.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/09-building-package.md new file mode 100644 index 00000000..fcc72d37 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/09-building-package.md @@ -0,0 +1,450 @@ +# Building a Package + +This guide documents how to extend the set of applications that Jarvis is +able to deploy. We refer to these as packages (pkgs for short). + +## Bootstrap a `Pkg` + +You can bootstrap a pkg to the primary repo as follows: + +```bash +jarvis repo create [name] [pkg_class] +``` + +`pkg_class` can be one of: + +- service +- app +- interceptor + +For example: + +```bash +jarvis repo create hermes service +jarvis repo create hermes_mpiio interceptor +jarvis repo create gray_scott app +``` + +We can then create an example pipeline as follows: + +```bash +jarvis ppl create test +jarvis ppl append hermes +jarvis ppl append hermes_mpiio +jarvis ppl append gray_scott +``` + +This is an example of a pipeline which will deploy Hermes, the Hermes MPI-IO +interceptor, and Gray Scott, which is an application which performs I/O using +MPI. + +## The `Pkg` Base Class + +This section will go over the variables and methods common across all Pkg types. +These variables will be initialized automatically. + +```python +class Pkg: + def __init__(self): + self.pkg_dir = '...' + self.shared_dir = '...' + self.private_dir = '...' + self.env = {} + self.config = {} + self.global_id = '...' + self.pkg_id = '...' +``` + +### `pkg_id` and `global_id` + +The Global ID (global_id) is the globally unique ID of the a package in all of +jarvis. It is a dot-separated string. Typically, the format is: + +``` +{pipeline_id}.{pkg_id} +``` + +The Package ID (pkg_id) is the unique ID of the package relative to a pipeline. +This is a simple string (no dots). + +For example, from section 5.1, we appended 3 packages: hermes, hermes_mpiio, and +gray_scott. hermes, hermes_mpiio, and gray_scott are also the pkg_ids. The +global_ids would be: + +``` +test.hermes +test.hermes_mpiio +test.gray_scott +``` + +Usage: + +``` +self.global_id +self.pkg_id +``` + +### `pkg_dir` + +The package directory is the location of the class python file on the filesystem. +For example, when calling `jarvis repo create hermes`, the directory +created by this command will be the pkg_dir. + +One use case for the pkg_dir is for creating template configuration files. +For example, OrangeFS has a complex XML configuration which would be a pain +to repeat in Python. One could include an OrangeFS XML config in their +package directory and commit as part of their Jarvis repo. + +Usage: + +``` +self.pkg_dir +``` + +### `shared_dir` + +The shared_dir is a directory stored on a filesystem common across all nodes +in the hostfile. Each node has the same view of data in the shared_dir. The +shared_dir contains data for the specific pkg to avoid conflicts in +a pipeline with multiple pkgs. + +For example, when deploying Hermes, we assume that each node has the Hermes +configuration file. Each node is expected to have the same configuration file. +We store the Hermes config in the shared_dir. + +Usage: + +``` +self.shared_dir +``` + +### `private_dir` + +This is a directory which is common across all nodes, but nodes do not +have the same view of data. + +For example, when deploying OrangeFS, it is required that each node has a file +called pvfs2tab. It essentially stores the protocol + address that OrangeFS +uses for networking. However, the content of this file is different for +each node. Storing it in the shared_dir would be incorrect. This is why we +need the private_dir. + +Usage: + +``` +self.private_dir +``` + +### `env` + +Jarvis pipelines store the current environment in a YAML file, which represents +a python dictionary. The key is the environment variable name (string) and the +value is the intended meaning of the variable. There is a single environment +used for the entire pipeline. Each pipeline stores its own environment to avoid +conflict. + +Usage: + +``` +self.env['VAR_NAME'] +``` + +Environments can be modified using various helper functions: + +``` +self.track_env(env_track_dict) +self.prepend_env(env_name, val) +self.setenv(env_name, val) +``` + +Viewing the env YAML file for the current pipeline from the CLI + +``` +cat `jarvis path`/env.yaml +``` + +### `config` + +The Jarvis configuration is stored in `{pkg_dir}/{pkg_id}.yaml`. +Unlike the environment dict, this stores variables that are specific to +the package. They are not global to the pipeline. + +For example, OrangeFS and Hermes need to know the desired port number and +RPC protocol. This information is specific to the program, not the entire +pipeline. + +Usage: + +``` +self.config['VAR_NAME'] +``` + +### `jarvis` + +The Jarvis CD configuration manager stores various properties global to +all of Jarvis. The most important information is the hostfile and resource_graph, +discussed in the next sections. + +Usage: + +``` +self.jarvis +``` + +### `hostfile` + +The hostfile contains the set of all hosts that Jarvis has access to. +The hostfile format is documented [here](https://github.com/scs-lab/jarvis-util/wiki/4.-Hostfile). + +Usage: + +``` +self.jarvis.hostfile +``` + +### `resource_graph` + +The resource graph can be queried to get storage and networking information +for storing large volumes of data. + +``` +self.jarvis.resource_graph +``` + +## Building a Service or Application + +Services and Applications implement the same interface, but are logically +slightly different. A service is long-running and would typically require +the users to manually stop it. Applications stop automatically when it +finishes doing what it's doing. Jarvis can deploy services alongside +applications to avoid the manual stop when benchmarking. + +### `_init` + +The Jarvis constructor (`_init`) is used to initialize global variables. +Don't assume that self.config is initialized. +This is to provide an overview of the parameters of this class. +Default values should almost always be None. + +```python +def _init(self): + self.gray_scott_path = None +``` + +### `_configure_menu` + +The function defines the set of command line options that the user can set. +An example configure menu is below: + +```python +def _configure_menu(self): + """ + Create a CLI menu for the configurator method. + For thorough documentation of these parameters, view: + https://github.com/scs-lab/jarvis-util/wiki/3.-Argument-Parsing + + :return: List(dict) + """ + return [ + { + 'name': 'port', + 'msg': 'The port to listen for data on', + 'type': int, + 'default': 8080 + } + ] +``` + +This function is called whenever configuring a package. For example, + +```bash +jarvis pkg configure hermes --sleep=10 --port=25 +``` + +This will configure hermes to sleep for 10 seconds after launching to give enough +time to fully start Hermes. Sleep is apart of all configure menus by default. + +The format of the output dict is documented in more detail +[here](https://github.com/scs-lab/jarvis-util/wiki/3.-Argument-Parsing). + +### `configure` + +It takes as input a +dictionary. The keys of this dict are determined from \_configure_menu function +output. It is responsible for updating the self.config variable appropriately +and generating the application-specific configuration files. + +Below is an example for Hermes. This example takes as input the port option, +modifies the hermes_server dict, and then stores the dict in a YAML file +in the shared directory. + +```python +def configure(self, **kwargs): + """ + Converts the Jarvis configuration to application-specific configuration. + E.g., OrangeFS produces an orangefs.xml file. + + :param config: The human-readable jarvis YAML configuration for the + application. + :return: None + """ + self.update_config(kwargs, rebuild=False) + hermes_server_conf = { + 'port': self.config['port'] + } + YamlFile(f'{self.shared_dir}/hermes_server_yaml').save(hermes_server_conf) +``` + +This function is called whenever configuring a packge. Specifically, this is +called immediately after \_configure_menu. For example, + +``` +jarvis pkg configure hermes --sleep=10 --port=25 +``` + +will make the kwargs dict be: + +```python +{ + 'sleep': 10, + 'port': 25 +} +``` + +### `start` + +The start function is called during `jarvis ppl run` and `jarvis ppl start`. +This function should execute the program itself. + +Below is an example for Hermes: + +```python +def start(self): + """ + Launch an application. E.g., OrangeFS will launch the servers, clients, + and metadata services on all necessary pkgs. + + :return: None + """ + self.daemon_pkg = Exec('hermes_daemon', + PsshExecInfo(hostfile=self.jarvis.hostfile, + env=self.env, + exec_async=True)) + time.sleep(self.config['sleep']) + print('Done sleeping') +``` + +### `stop` + +The stop function is called during `jarvis ppl run` and `jarvis ppl stop`. +This function should terminate the program. + +Below is an example for Hermes: + +```python +def stop(self): + """ + Stop a running application. E.g., OrangeFS will terminate the servers, + clients, and metadata services. + + :return: None + """ + Exec('finalize_hermes', + PsshExecInfo(hostfile=self.jarvis.hostfile, + env=self.env)) + if self.daemon_pkg is not None: + self.daemon_pkg.wait() + Kill('hermes_daemon', + PsshExecInfo(hostfile=self.jarvis.hostfile, + env=self.env)) +``` + +This is not typically implemented for Applications, but it is for Services. + +### `clean` + +The `clean` function is called during `jarvis ppl clean`. +It clears all intermediate data produced by a pipeline. + +Below is the prototype + +```python +def clean(self): + """ + Destroy all data for an application. E.g., OrangeFS will delete all + metadata and data directories in addition to the orangefs.xml file. + + :return: None + """ + pass +``` + +### `status` + +The `status` function is called during `jarvis ppl status` +It determines whether or not a service is running. This is not typically +implemented for Applications, but it is for Services. + +## Building an Interceptor + +Interceptors are used to modify environment variables to route system and library +calls to new functions. + +Interceptors have a slightly different interface -- they only have: +`_init`, `_configure_menu`, `configure`, and `modify_env`. The only new function +here is modify_env. The others were defined in the previous section and behave +the exact same way. + +### `configure` + +Configuring an interceptor tends to be a little different. The interceptors +are not typically responsible for generating configuration files like the +applications and services do. These typically are responsible solely for +modifying the environment. + +Below, we show an example of configure for the Hermes MPI I/O interceptor: + +```python +def configure(self, **kwargs): + """ + Converts the Jarvis configuration to application-specific configuration. + E.g., OrangeFS produces an orangefs.xml file. + + :param kwargs: Configuration parameters for this pkg. + :return: None + """ + self.update_config(kwargs, rebuild=False) + self.config['HERMES_MPIIO'] = self.find_library('hermes_mpiio') + if self.config['HERMES_MPIIO'] is None: + raise Exception('Could not find hermes_mpiio') + print(f'Found libhermes_mpiio.so at {self.config["HERMES_MPIIO"]}') +``` + +Here we use self.find_library() to check if we can find the shared library +hermes_mpiio in the system paths. This function introspects LD_LIBRARY_PATH +and determines if hermes_mpiio is in the path. It saves the path in the pkg +configuration (self.config). + +### `modify_env` + +Below is an example of the MPI I/O interceptor for Hermes: + +```python +def modify_env(self): + """ + Modify the jarvis environment. + + :return: None + """ + self.prepend_env('LD_PRELOAD', self.config['HERMES_MPIIO']) +``` + +## A Note on `jarvis-util` + +`jarvis-cd` aims to provide structure to storing configuration files for simplifying +complex benchmarks. + +`jarvis-util` is primarily responsible for handling program execution. This +includes things like executing MPI and PSSH in Python. This is where the +`Exec` and `PsshExecInfo` data structures come from. More information +on `jarvis-util` can be found [here](https://github.com/scs-lab/jarvis-util/wiki). diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/10-python-api.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/10-python-api.md new file mode 100644 index 00000000..3276d14f --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/10-python-api.md @@ -0,0 +1,128 @@ +# Python API + +This guide documents how to use Jarvis within a Python script. To this point, +we have demonstrated the Jarvis CLI. However, the Python API can be used +for building more complex benchmarks. + +## Importing Jarvis CD + +```python +from jarvis_cd.basic.pkg import Pipeline +``` + +## Creating a Pipeline + +To create a pipeline and save the environment for the pipeline: + +USAGE: + +```python +pipeline = Pipeline().create(pipeline_id).build_env().save() +``` + +For example: + +```python +pipeline = Pipeline().create('gs-hermes').build_env().save() +``` + +NOTE: `create()` will not override any data if the pipeline already exists. + +## Loading an Existing Pipeline + +USAGE: + +```python +pipeline = Pipeline().load(pipeline_id=None) +``` + +The following will load the currently-focused pipeline: + +```python +pipeline = Pipeline().load() +``` + +The following will load the pipeline with a particular name + +```python +pipeline = Pipeline().load('gs-hermes') +``` + +## Append Pkgs to a Pipeline + +USAGE: + +```python +pipeline.append(pkg_type, pkg_id=None, do_configure=True, **kwargs) +""" +Create and append a pkg to the pipeline + +:param pkg_type: The type of pkg to create (e.g., OrangeFS) +:param pkg_id: Semantic name of the pkg to create +:param do_configure: Whether to configure while appending +:param kwargs: Any parameters the user want to configure in the pkg +:return: self +""" +``` + +The following will add Hermes to the pipeline with a sleep of 10 + +```python +pipeline.append('hermes', 'sleep'=10) +pipeline.append('hermes_mpiio') +pipeline.append('gray_scott') +pipeline.save() +``` + +## Configure a `Pkg` in the Pipeline + +USAGE: + +```python +pkg = pipeline.get_pkg(pkg_id) +pkg.configure(**kwargs) +``` + +For example: + +```python +pkg = pipline.get_pkg('hermes') +pkg.configure(sleep=5).save() +``` + +## Unlink/Remove `Pkg`s from a Pipeline + +Unlink will simply remove the program from the Jarvis config, but not +destroy its contents. Unlinked pkgs can be re-linked using append without +losing the configuration data. + +Remove ereases the pkg from the filesystem entirely. + +USAGE: + +```python +pipeline.remove(pkg_id).save() +pipeline.unlink(pkg_id).save() +``` + +For example: + +```python +pipeline.remove('hermes').save() +``` + +## Run a Pipeline + +To run the Pipeline end-to-end: + +```python +pipeline.run() +``` + +## Destroy a Pipeline + +To destroy a Pipeline: + +```python +pipeline.destroy() +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/11-schedulers.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/11-schedulers.md new file mode 100644 index 00000000..7fe92ca0 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/11-schedulers.md @@ -0,0 +1,46 @@ +# Schedulers + +Jarvis-CD provides native support for two popular job scheduling systems: SLURM and PBS. This documentation aims to guide users on how to utilize both schedulers through Jarvis-CD. + +## SLURM Scheduler + +Jarvis-CD integrates with the SLURM scheduler through the pipeline sbatch menu. You can specify job parameters using the following options: + +- **job_name** (required): Name of the job. +- **nnodes** (required): Number of nodes to execute the pipeline on. +- ppn: Number of processes per node. +- cpus_per_task: Number of processors required per task. +- time: Maximum time allotted to the job. +- partition: The partition in which to allocate nodes (default is compute). +- mail_type: When to email users about the job status. Choices include: NONE, BEGIN, END, FAIL, REQUEUE, ALL. +- mail_user: Email address for job notifications. +- output_file: File path to write all output messages. +- error_file: File path to write all error messages. +- memory: Amount of memory to request for the job. +- gres: List of generic consumable resources (like GPUs). +- exclusive: Request nodes exclusively (default is True). + +To run a job using SLURM: + +```bash +jarvis ppl sbatch job_name=test nnodes=4 +``` + +## PBS Scheduler + +Jarvis-CD also supports the PBS scheduler through the pipeline pbs menu. The following options are available: + +- **nnodes** (required): Number of nodes to execute the pipeline on (default is 1). +- system: Type of system to allocate the nodes on (default is polaris). +- filesystems: Filesystem to be used, e.g., home:grand (default is home:grand). +- walltime: Maximum time allotted to the job (default is 00:10:00). +- account: Account used for job submission (default is VeloC). +- queue: Queue in which to submit the job (default is debug-scaling). +- interactive: Submit the job in interactive mode (default is False). +- env_vars: Environmental variables to pass through PBS. Format: comma-separated list of strings like variable or variable=value. + +To run a job using PBS: + +```bash +jarvis ppl pbs nnodes=2 system=other_system +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/_category_.yml b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/_category_.yml new file mode 100644 index 00000000..2d04cc7c --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/_category_.yml @@ -0,0 +1 @@ +label: "Packages" diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/cm1.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/cm1.md new file mode 100644 index 00000000..7ea5ef36 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/cm1.md @@ -0,0 +1,70 @@ +# CM1 + +## Dependencies + +```bash +spack install intel-oneapi-compilers +spack load intel-oneapi-compilers +spack compilers add +spack install h5z-zfp%intel +``` + +## Compiling/Installing + +```bash +git clone git@github.com:lukemartinlogan/cm1r19.8-LOFS.git +cd cm1r19.8-LOFS +# COREX * COREY is the number of cores you intend to use on the system +# They do not need to be 2 and 2 here, but this is how our configurations are compiled for now +COREX=2 COREY=2 bash buildCM1-spack.sh +export PATH=${PWD}/run:${PATH} +export CM1_PATH=${PWD} +``` + +## General Usage + +```bash +mpirun -n [COREX * COREY] ${CM1_PATH}/run/cm1.exe [namelist.input] [output_dir] [filename_base] [restart_dir] +``` + +- output_dir: the directory where simulation output goes +- filename_base: the name of the simulation file to generate +- restart_dir: a directory to store checkpoints for a restart (I believe) + +## Brief overview of `namelist.input` + +The following variables define the dimensions of a 3D grid + +``` +nx = 16, +ny = 16, +nz = 16, +``` + +I set them lower to reduce extreme memory consumption in single-node cases. + +These variables must be set relatively to COREX and COREY. + +``` + nodex = 2, !nuke + nodey = 2, !nuke + rankx = 2, + ranky = 2, +``` + +You must satisfy the following constraints when setting these: + +1. rankx _ ranky = COREX _ COREY +2. rankx > corex and ranky > corey + +## Radiative Convective Equilibrium + +```bash +cd ${CM1_PATH}/run/config_files/cpm_RadConvEquil +mkdir output +mpirun -n 4 ${CM1_PATH}/run/cm1.exe ${CM1_PATH}/run/namelist.input.nssl3 output ex output +``` + +## Near Future + +Make a jarvis-cd package to automate all of these odd constraints for benchmarking and deployment diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/deepdrivemd.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/deepdrivemd.md new file mode 100644 index 00000000..cf03ca19 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/deepdrivemd.md @@ -0,0 +1,326 @@ +# DeepDriveMD + +## Dependencies + +You can setup environments two ways + +- [create environment from config files](#ddmd-conda-environment-from-config-files) +- [buid the conda environment from scratch](https://github.com/candiceT233/deepdrivemd_pnnl/blob/main/docs/conda_env/README.md) + +### Prepare Conda Environment from Config Files + +#### Prepare Conda + +Get the `miniconda3` installation script and run it + +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-Linux-x86_64.sh.sh +``` + +The current conda version tested work that works `conda 23.3.1`. + +#### First git clone this repo and save it to `$DDMD_PATH` + +``` +export CONDA_OPENMM=openmm7_ddmd +export CONDA_PYTORCH=ddmd_pytorch +export DDMD_PATH=${PWD}/deepdrivemd +export MOLECULES_PATH=$DDMD_PATH/submodules/molecules +git clone --recursive https://github.com/candiceT233/deepdrivemd_pnnl.git $DDMD_PATH +cd $DDMD_PATH +``` + +#### Create the two conda environments + +Name your two environment names `$CONDA_OPENMM` `$CONDA_PYTORCH`. + +``` +cd $DDMD_PATH +conda env create -f ${DDMD_PATH}/docs/conda_env/ddmd_openmm7.yaml --name=${CONDA_OPENMM} +conda env create -f ${DDMD_PATH}/docs/conda_env/ddmd_pytorch.yaml --name=${CONDA_PYTORCH} +``` + +If mdtools fails to install, that's ok. It will be handled in step 4. + +#### Update python packages in both conda environments + +Update CONDA_OPENMM + +``` +source activate $CONDA_OPENMM +cd $DDMD_PATH/submodules/MD-tools +pip install . +cd $DDMD_PATH/submodules/molecules +pip install . +conda deactivate +``` + +Update CONDA_PYTORCH + +``` +source activate $CONDA_PYTORCH +cd $DDMD_PATH/submodules/MD-tools +pip install . +cd $DDMD_PATH/submodules/molecules +pip install . +conda deactivate +``` + +## Hermes Dependencies + +### In Ares + +``` +module load hermes/pnnl-tz3s7yx +``` + +this automatically loads the Hermes build with VFD, and it's HDF5 dependency. + +### Personal Machine + +If building Hermes yourself: + +- Sequential HDF5 >= 1.14.0 +- Hermes>=1.0 with VFD and POSIX Adaptor support + +Build HDF5 + +``` +scspkg create hdf5 +cd `scspkg pkg src hdf5` +git clone https://github.com/HDFGroup/hdf5.git -b hdf5_1_14_0 +cd hdf5 +mkdir build +cd build +cmake ../ -DHDF5_BUILD_HL_LIB=ON -DCMAKE_INSTALL_PREFIX=`scspkg pkg root hdf5` +make -j8 +make install +``` + +Install Hermes with Custom HDF5 + +``` +spack install mochi-thallium~cereal@0.10.1 cereal catch2@3.0.1 mpich@3.3.2 yaml-cpp boost@1.7 +spack load mochi-thallium~cereal@0.10.1 cereal catch2@3.0.1 mpich@3.3.2 yaml-cpp boost@1.7 +module load hdf5 +``` + +NOTE: this only needs to be done for the CONDA_OPENMM environment, since both environment use the same exact python version. HDF5 will be compiled the same. However, these commands must be executed before source active $CONDA_PYTORCH to avoid overriding the python version. + +## Installation + +- `h5py==3.8.0` is required for `hdf5-1.14.0` and `Hermes>=1.0` +- `pip install h5py==3.8.0` should be run after deepdrivemd installation due to version restriction with pip +- makesure you have `hdf5-1.14.0` installed and added to $PATH before installing h5py (otherwise it will download hdf5-1.12.0 by default) + +``` +module load hdf5 + +cd $DDMD_PATH +source activate $CONDA_OPENMM +pip install -e . +pip uninstall h5py; pip install h5py==3.8.0 +conda deactivate + +source activate $CONDA_PYTORCH +pip install -e . +pip uninstall h5py; pip install h5py==3.8.0 +conda deactivate +``` + +## Usage + +Below describes running one iteration of the 4-stages pipeline. \ +Set up experiment path in `$EXPERIMENT_PATH`, this will store all output files and log files from all stages. + +```bash +EXPERIMENT_PATH=~/ddmd_runs +mkdir -p $EXPERIMENT_PATH +``` + +--- + +### Stage 1 : OPENMM + +Run code: + +```bash +source activate $CONDA_OPENMM + +PYTHONPATH=$DDMD_PATH:$MOLECULES_PATH python $DDMD_PATH/deepdrivemd/sim/openmm/run_openmm.py -c $YAML_PATH/molecular_dynamics_stage_test.yaml +``` + +This stage runs simulation, minimally you have to run 12 simulation tasks for stage 3 & 4 to work. So you must run the above command at least 12 times and each time with a different `TASK_IDX_FORMAT`. + +#### Environment variables note + +- `TASK_IDX_FORMAT` : give a different task ID format for each openmm task, starts with `task0000` up to `task0011` for 12 tasks. +- `SIM_LENGTH` : The simulation size, must be at least `0.1` for stage 3 & 4 to work. +- `GPU_IDX` : set it to 0 since GPU is not used +- `YAML_PATH` : The yaml file that contains the test configuration for the first stage + +Setup environment variables and paths + +```bash +SIM_LENGTH=0.1 +GPU_IDX=0 +TASK_IDX_FORMAT="task0000" +STAGE_IDX=0 +OUTPUT_PATH=$EXPERIMENT_PATH/molecular_dynamics_runs/stage0000/$TASK_IDX_FORMAT +YAML_PATH=$DDMD_PATH/test/bba +mkdir -p $OUTPUT_PATH +``` + +In the yaml file [`molecular_dynamics_stage_test.yaml`](https://github.com/candiceT233/deepdrivemd_pnnl/blob/main/test/bba/molecular_dynamics_stage_test.yaml), makesure to modify the following fields accordingly: + +``` +nano ${DDMD_PATH}/test/bba/molecular_dynamics_stage_test.yaml +``` + +```yaml +experiment_directory: $EXPERIMENT_PATH +stage_idx: $STAGE_IDX +output_path: $OUTPUT_PATH +pdb_file: $DDMD_PATH/data/bba/system/1FME-unfolded.pdb +initial_pdb_dir: $DDMD_PATH/data/bba +simulation_length_ns: $SIM_LENGTH +reference_pdb_file: $DDMD_PATH/data/bba/1FME-folded.pdb +gpu_idx: $GPU_IDX +``` + +Sample output under one task folder (total 12 tasks folders): + +```log +ls -l $OUTPUT_PATH +-rw-rw-r-- 1 username username 722 Aug 11 01:08 aggregate_stage_test.yaml +-rw-rw-r-- 1 username username 786 Aug 10 21:50 molecular_dynamics_stage_test.yaml +-rw-rw-r-- 1 username username 599K Aug 10 21:56 stage0000_task0000.dcd +-rw-rw-r-- 1 username username 164K Aug 10 21:56 stage0000_task0000.h5 +-rw-rw-r-- 1 username username 39K Aug 10 21:50 system__1FME-unfolded.pdb +``` + +--- + +### Stage 2 : AGGREGATE + +Run code: + +```bash +source activate $CONDA_OPENMM + +PYTHONPATH=$DDMD_PATH/ python $DDMD_PATH/deepdrivemd/aggregation/basic/aggregate.py -c $YAML_PATH/aggregate_stage_test.yaml +``` + +This stage only need to be run one time, it aggregates all the `stage0000_task0000.h5` files from simulation into a single `aggregated.h5` file. + +Setup a different output path to the first openmm task folder: + +```bash +OUTPUT_PATH=$EXPERIMENT_PATH/machine_learning_runs/stage0000/task0000 + +mkdir -p $OUTPUT_PATH +``` + +In the yaml file [`aggregate_stage_test.yaml`](https://github.com/candiceT233/deepdrivemd_pnnl/blob/main/test/bba/aggregate_stage_test.yaml), makesure to modify the following fields accordingly: + +```yaml +experiment_directory: $EXPERIMENT_PATH +stage_idx: $STAGE_IDX +pdb_file: $DDMD_PATH/data/bba/system/1FME-unfolded.pdb +reference_pdb_file: $DDMD_PATH/data/bba/1FME-folded.pdb +``` + +Expected output: + +```log +ls -l $OUTPUT_PATH | grep aggregated +-rw-rw-r-- 1 username username 1.6M Aug 11 01:08 aggregated.h5 +``` + +--- + +### Stage 3 : TRAINING + +Run code: + +```bash +source activate $CONDA_PYTORCH + +PYTHONPATH=$DDMD_PATH/:$MOLECULES_PATH python $DDMD_PATH/deepdrivemd/models/aae/train.py -c $YAML_PATH/training_stage_test.yaml +``` + +When the code run, python might show warning messages that can be ignored. + +Setup a different output path: + +```bash +OUTPUT_PATH=$EXPERIMENT_PATH/machine_learning_runs/stage000$STAGE_IDX/$TASK_IDX_FORMAT + +mkdir -p $OUTPUT_PATH +``` + +In the yaml file [`training_stage_test.yaml`](https://github.com/candiceT233/deepdrivemd_pnnl/blob/main/test/bba/training_stage_test.yaml), makesure to modify the following fields accordingly: + +```yaml +experiment_directory: $EXPERIMENT_PATH +output_path: $OUTPUT_PATH +``` + +Expected output: + +```log +ls -l $OUTPUT_PATH +drwxrwxr-x 2 username username 4.0K Aug 11 01:09 checkpoint +-rw-rw-r-- 1 username username 1.5M Aug 11 01:10 discriminator-weights.pt +drwxrwxr-x 2 username username 4.0K Aug 11 01:10 embeddings +-rw-rw-r-- 1 username username 2.0M Aug 11 01:10 encoder-weights.pt +-rw-rw-r-- 1 username username 2.7M Aug 11 01:10 generator-weights.pt +-rw-rw-r-- 1 username username 1.2K Aug 11 01:10 loss.json +-rw-rw-r-- 1 username username 495 Aug 11 01:08 model-hparams.json +-rw-rw-r-- 1 username username 82 Aug 11 01:08 optimizer-hparams.json +-rw-rw-r-- 1 username username 884 Aug 11 01:08 training_stage_test.yaml +-rw-rw-r-- 1 username username 1.3K Aug 11 01:08 virtual-h5-metadata.json +-rw-rw-r-- 1 username username 10K Aug 11 01:08 virtual_stage0000_task0000.h5 +``` + +--- + +### Stage 4 : INFERENCE + +Run code: + +```bash +source activate $CONDA_PYTORCH + +OMP_NUM_THREADS=4 PYTHONPATH=$DDMD_PATH/:$MOLECULES_PATH python $DDMD_PATH/deepdrivemd/agents/lof/lof.py -c $YAML_PATH/inference_stage_test.yaml +``` + +`OMP_NUM_THREADS` can be changed. + +Update environment variables: + +```bash +STAGE_IDX=3 + +OUTPUT_PATH=$EXPERIMENT_PATH/inference_runs/stage0000/$TASK_IDX_FORMAT + +mkdir -p $OUTPUT_PATH +``` + +In the yaml file [`inference_stage_test.yaml`](https://github.com/candiceT233/deepdrivemd_pnnl/blob/main/test/bba/inference_stage_test.yaml), makesure to modify the following fields accordingly: + +```yaml +experiment_directory: $EXPERIMENT_PATH +stage_idx: $STAGE_IDX +output_path: $OUTPUT_PATH +``` + +Expected output files: + +```log +ls -l $OUTPUT_PATH +-rw-rw-r-- 1 username username 479 Aug 11 01:10 inference_stage_test.yaml +-rw-rw-r-- 1 username username 1.5K Aug 11 01:10 virtual-h5-metadata.json +-rw-rw-r-- 1 username username 18K Aug 11 01:10 virtual_stage0003_task0000.h5 +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget2.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget2.md new file mode 100644 index 00000000..dfff1759 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget2.md @@ -0,0 +1,47 @@ +# Gadget2 + +GADGET is a freely available code for cosmological N-body/SPH simulations on massively parallel computers with distributed memory. GADGET uses an explicit communication model that is implemented with the standardized MPI communication interface. The code can be run on essentially all supercomputer systems presently in use, including clusters of workstations or individual PCs. + +GADGET computes gravitational forces with a hierarchical tree algorithm (optionally in combination with a particle-mesh scheme for long-range gravitational forces) and represents fluids by means of smoothed particle hydrodynamics (SPH). The code can be used for studies of isolated systems, or for simulations that include the cosmological expansion of space, both with or without periodic boundary conditions. In all these types of simulations, GADGET follows the evolution of a self-gravitating collisionless N-body system, and allows gas dynamics to be optionally included. Both the force computation and the time stepping of GADGET are fully adaptive, with a dynamic range which is, in principle, unlimited. + +https://wwwmpa.mpa-garching.mpg.de/gadget/ + +## Dependencies + +```bash +spack install hdf5@1.14.1 gsl@2.1 fftw@2+mpi +spack load hdf5@1.14.1 gsl@2.1 fftw@2+mpi +scspkg create gadget2 +cd $(scspkg pkg src gadget2) +git clone https://github.com/lukemartinlogan/gadget2.git +cd gadget2 +mkdir build +cd build +``` + +## NGenIC + +NGenIC is the initial boundary condition generation code. +This can be used to generate any arbitrary initial condition file. + +```bash +mpirun -n 8 build/bin/NGenIC N-GenIC/ics.param +``` + +## Manual Gassphere Example + +Compile: +```bash +cmake ../ \ +-DPEANOHILBERT=ON \ +-DWALLCLOCK=ON \ +-DSYNCHRONIZATION=ON +make -j8 +``` + +Run: +```bash +cd ../ +mkdir gassphere +build/bin/Gadget2 Gadget2/parameterfiles/gassphere.param +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget4.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget4.md new file mode 100644 index 00000000..3108e8c3 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/gadget4.md @@ -0,0 +1,27 @@ +# Gadget + +GADGET-4 is a massively parallel code for N-body/hydrodynamical cosmological +simulations. It is a flexible code that can be applied to a variety of different +types of simulations, offering a number of sophisticated simulation algorithms. +An account of the numerical algorithms employed by the code is given in the +original code paper, subsequent publications, and this documentation. + +GADGET-4 was written mainly by Volker Springel, with important contributions and +suggestions being made by numerous people, including Ruediger Pakmor, Oliver +Zier, and Martin Reinecke. + +https://wwwmpa.mpa-garching.mpg.de/gadget4/ + +## Dependencies + +```bash +spack install hdf5@1.14.1 gsl@2.1 fftw@3+mpi +spack load hdf5@1.14.1 gsl@2.1 fftw@3+mpi +scspkg create gadget4 +cd $(scspkg pkg src gadget4) +git clone https://gitlab.mpcdf.mpg.de/vrs/gadget4 +cd gadget4 +mkdir build +cd build + +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/lammps.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/lammps.md new file mode 100644 index 00000000..b228e2ea --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/lammps.md @@ -0,0 +1,122 @@ +# LAMMPS +## Installation +## On Ares +When you are on Ares, you should ideally find the lammps application already installed as a module. To see if there is a lammps module in ares run the below command: + +```linux +module spider lammps +#module -r spider '.*lammps*.' +``` +To load the module use. (Note that this loads the default lammps version) +```linux +module load lammps +``` +## Using spack +You can view the lammps in list of provided packages in spack using: +```linux +spack list lammps +``` +To list the details of lammps(version, description, dependencies, etc.) +```linux +spack info lammps +``` + To install lammps with adios2 dependency using spack +```linux +spack install lammps+adios +``` +## Building from source +You can download the source and the documentation as a tarball +```linux +mkdir Build_lammps +cd Build_lammps +wget -c https://download.lammps.org/tars/lammps-stable.tar.gz +tar -xzvf lammps*.tar.gz +```` +Now change the directory to the LAMMPS distribution directory that just got uncompressed +```linux +cd /path/to/lammps +mkdir build; cd build # create and use a build directory +``` + +### Building LAMMPS with ADIOS2 +#### Using CMake +We are going to install LAMMPS with ADIOS2 using CMake. Note that LAMMPS requires CMake version 3.10 or later. You can check your cmake version using: +```linux +cmake --version +``` +In order to build lammps with adios2, the adios2 library need to be on your system or be downloaded and built. Then, you need to tell LAMMPS where it is found on your system. +```linux +# This can be done by providing the adios2 installation directory in PATH variable +export PATH=/path/of/adios2:$PATH +#This is ADIOS2 installed path in my system: ~/spack/opt/spack/linux-ubuntu22.04-skylake/gcc-11.4.0/adios2-2.9.0-fsrkmnhkjp4ozmq5fsy2ryeaqcuuzhu +``` +One important thing to note here is that if you have ADIOS2 built either with or without MPI then the same has to be matched while building LAMMPS. +```linux +#if ADIOS2 was built with MPI +cmake ../cmake -D PKG_ADIOS=yes -D BUILD_MPI=yes +#if ADIOS2 was built without MPI then +#cmake ../cmake -D PKG_ADIOS=yes -D BUILD_MPI=no +``` +Now the build files would been written to your build folder. +Now compile them using: +```linux +make +#make -j N +``` +If the compilation is successful, you should see a library ```liblammps.a``` and the LAMMPS executable ```lmp``` inside the build folder. + +Now you can either use LAMMPS by running the executable using from this directory using: +```linux +./lmp +```` +Or, you can install the LAMMPS executable into your system with: +```linux +make install #copies compiled files into installation location +``` +## Usage +In order to show the usage of lammps with adios2, we can use either ```dump atom/adios``` or ```dump custom/adios``` commands. +What these commands do is that these dump a snapshot of atom coordinates every N timesteps in ADIOS-bases "BP" file format, or using a different I/O solutions in adios to a stream that can be read on-line by another program. +Here is the syntax for each: +```linux +dump ID group-ID atom/adios N file.bp +dump ID group-ID custom/adios N file.bp args +``` +Example usages: +Save the below file as ```lj_fluid.in``` +```linux +# create a Lennard-Jones fluid +units lj +atom_style atomic +lattice fcc 0.8442 +region box block 0 20 0 20 0 20 +create_box 1 box +create_atoms 1 box +mass 1 1.0 +velocity all create 1.44 87287 loop geom +pair_style lj/cut 2.5 +pair_coeff 1 1 1.0 1.0 2.5 +neighbor 0.3 bin +neigh_modify delay 0 every 20 check no + +# define a group of atoms +group fluid type 1 + +# dump atom coordinates every 100 timesteps to atoms.bp +dump adios1 fluid atom/adios 100 atoms.bp + +# dump custom quantities every 50 timesteps to custom.bp +dump adios2 fluid custom/adios 50 custom.bp id type x y z vx vy vz + +# run the simulation for 1000 timesteps +run 1000 +``` +Then run lammps by giving this input file as argument as shown: +```linux +lmp -in lj_fluid.in +``` +If this is successful, you should see the below files in the current directory: +``` +custom.bp +atoms.bp +``` +Note that a file ```adios2_config.xml``` with specific configuration settings is expected in the current working directory. If the file is not present, LAMMPS will create a minimal default file. diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/nyx.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/nyx.md new file mode 100644 index 00000000..b42f640f --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/nyx.md @@ -0,0 +1,121 @@ +# Nyx + +https://amrex-astro.github.io/Nyx/ + +Nyx code solves equations of compressible hydrodynamics on an adaptive grid hierarchy coupled with an N-body treatment of dark matter. The gas dynamics in Nyx uses a finite volume methodology on a set of 3-D Eulerian grids; dark matter is represented as discrete particles moving under the influence of gravity. Particles are evolved via a particle-mesh method, using Cloud-in-Cell deposition/interpolation scheme. Both baryonic and dark matter contribute to the gravitational field. In addition, Nyx includes physics needed to accurately model the intergalactic medium: in optically thin limit and assuming ionization equilibrium, the code calculates heating and cooling processes of the primordial-composition gas in an ionizing ultraviolet background radiation field. Additional physics capabilities are under development + +## Installation + +1. Dependencies: MPI and "parallel hdf5" + +2. Install ARMex + +``` +git clone https://github.com/AMReX-Codes/amrex.git +mkdir amrex/build && cd amrex/build +cmake .. -DAMReX_HDF5=ON -DAMReX_PARTICLES=ON -DAMReX_PIC=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$HOME/amrex/install +make -j 8 +make install +``` + +2. Install Nyx + +``` +git clone https://github.com/AMReX-astro/Nyx.git +cd Nyx +mkdir build && cd build +cmake .. -DCMAKE_PREFIX_PATH=$HOME/amrex/install/ -DAMReX_DIR=$HOME/amrex/install/Tools/CMake/ -DNyx_SINGLE_PRECISION_PARTICLES=OFF -DNyx_OMP=OFF +make -j 8 +``` + +## Usage + +The Nyx executable reads run-time information from an “inputs” file which you designate on the command line. Most executable directories have an "inputs" file. Nyx has several different executables. Here I only show how to run the **LyA** example. + +### Configure the "inputs" file under **LyA** directory + +1. By default, Nyx doesn't output HDF5 files. To enable Nyx to write HDF5 files, adding `nyx.write_hdf5 = 1` to the "inputs" file. For example: + +``` +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 200 + +nyx.ppm_type = 1 +nyx.use_colglaz = 0 +nyx.corner_coupling = 1 +nyx.write_hdf5 = 1 # add this line +nyx.strang_split = 0 +nyx.sdc_split = 1 +nyx.add_ext_src = 0 +nyx.heat_cool_type = 11 +``` + +2. Set the `nyx.initial_z` and `nyx.final_z`, z corresponds to time, but it is negative. `nyx.initial_z` is larger than `nyx.final_z`. For example: + +``` +nyx.initial_z = 190.0 +nyx.final_z = 170.0 +``` + +3. Nyx needs to initialize binary particles. I want it to read from a binary file. By default, there is a binary file `64sssss_20mpc.nyx` in LyA directory. Here are some related parameters: + +``` +# >>>>>>>>>>>>> PARTICLE INIT OPTIONS <<<<<<<<<<<<<<<< +nyx.particle_init_type = BinaryFile +nyx.binary_particle_file = 64sssss_20mpc.nyx +particles.nparts_per_read = 2097152 +``` + +Note: If `64sssss_20mpc.nyx` is located in a different directory, you need to use the absolute path of the file. + +3. Nyx will generate two kinds of files: “plotfiles” and “checkpoint” files. The "plotfiles" are used for visualization and the "checkpoint" files for restarting the code. The "plotfiles" are written in AMReX plotfile binary format by default. You can tell Nyx to output HDF5 file by adding `nyx.write_hdf5 = 1` + +3.1 Set "plotfiles" related parameters + +``` +# PLOTFILES +amr.plot_files_output = 1 +amr.plot_file = /mnt/ssd/jye20/LyA_test/plt +amr.plot_int = -1 +nyx.plot_z_values = 188.0 184.0 182.0 + +amr.plot_vars = density xmom ymom zmom rho_e Temp phi_grav +#amr.derive_plot_vars = particle_mass_density particle_count +``` + +amr.plot_files_output: This is set to 1 to enable plot files. If you don't want to output plot files, set it to 0. + +amr.plot_file: This is the base name for the plotfile, e.g. plt. If you set it to be `/mnt/ssd/jye20/LyA_test/plt`, `/mnt/ssd/jye20/LyA_test` is the output directory. + +nyx.plot_z_values: Specify a list of z values for which Nyx will save a snapshot. + +amr.plot_vars: Specify the name of state variables to include in plotfiles + +amr.derive_plot_vars: Specify name of derived variables to include in plotfiles + +3.2 Set checkpoint file parameters + +``` +# CHECKPOINT FILES +amr.checkpoint_files_output = 1 +amr.check_file = /mnt/ssd/jye20/LyA_test/chk +amr.check_int = 100 +amr.checkpoint_nfiles = 64 +``` + +4. Resolution related parameters + +``` +amr.n_cell = 64 64 64 +amr.max_grid_size = 32 +``` + +Reference: [Nyx document](https://amrex-astro.github.io/Nyx/docs_html/NyxInputs.html#examples-of-usage-2) + +## Run the Nyx Application + +1. Enter into the **LyA** executable directory + +``` +./nyx_LyA ./inputs +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/openfoam.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/openfoam.md new file mode 100644 index 00000000..194dc318 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/openfoam.md @@ -0,0 +1,68 @@ +# OpenFOAM +## Installation +## On Ares +When you are on Ares, you should ideally find the openfoam application already installed as a module. To see if there is an openfoam module in ares run the below command: + +```linux +module spider openfoam +#module -r spider '.*openfoam*.' +``` +To load the module use. (Note that this loads the default openfoam version) +```linux +module load openfoam +``` +## Using spack +You can view the openfoam in list of provided packages in spack using: +```linux +spack list openfoam +``` +To list the details of openfoam(version, description, dependencies, etc.) +```linux +spack info openfoam +``` + To install openfoam with adios2 dependency using spack +```linux +spack install openfoam^adios2 +``` +## Building from source +You can download the source and the documentation as a tarball +```linux +mkdir OpenFOAM +cd OpenFOAM +wget https://dl.openfoam.com/source/v2306/OpenFOAM-v2306.tgz +tar xzvf OpenFOAM-v2306.tgz +``` +Now change the directory to the OpenFOAM distribution directory that just got uncompressed +```linux +cd OpenFOAM-v2306/ +``` + +Prior to building, ensure that the system requirements are satisfied and source the correct OpenFOAM environment. For example, for the OpenFOAM-v2306 version: +```linux +source ~/OpenFOAM/OpenFOAM-v2306/etc/bashrc +``` +There are the necessary minimum system requirements for installing OpenFOAM +``` +gcc : 7.5.0 +cmake: 3.8 +``` +You can check your versions using: +```linux +gcc --version +cmake --version +``` +Test the system readiness, use: +```linux +foamSystemCheck +``` +If your system is ready, you would get the following: +``` +System check: PASS +================== +Can continue to OpenFOAM installation. +``` +You should be able to see the ```Allwmake``` file in the OpenFOAM directory. To compile OpenFOAM, run: +```linux +./Allwmake -j -s -q -l +#This compiles with all cores (-j), reduced output (-s, -silent), with queuing (-q, -queue) and logs (-l, -log) the output to a file +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic-kokkos.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic-kokkos.md new file mode 100644 index 00000000..62aa39f3 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic-kokkos.md @@ -0,0 +1,47 @@ +# VPIC Kokkos + +## Installation + +```bash +scspkg create vpic_kokkos +scspkg env prepend vpic_kokkos CPATH ${PWD}/include/kokkos +cd $(scspkg pkg root vpic_kokkos)/.. +rm -rf vpic_kokkos +git clone --recursive git@github.com:lanl/vpic-kokkos.git vpic_kokkos +cd $(scspkg pkg root vpic_kokkos) +git submodule update --init --recursive +mkdir build +cd build +cmake ../ \ +-DKokkos_ENABLE_OPENMP=ON \ +-DENABLE_KOKKOS_CUDA=OFF \ +-DBUILD_INTERNAL_KOKKOS=ON \ +-DCMAKE_CXX_FLAGS="-g -O2 -rdynamic" \ +-DCMAKE_INSTALL_PREFIX=$(scspkg pkg root vpic_kokkos) +make -j8 +make install +module load vpic_kokkos +``` + +## The generic example + +Compile: +``` +vpic $(scspkg pkg root vpic_kokkos)/sample/generic +vpic $(scspkg pkg root vpic_kokkos)/test/integrated/energy_comparison/weibel.deck +vpic $(scspkg pkg root vpic_kokkos)/test/integrated/to_completion/dump.deck +``` + +Run: +``` +mkdir mytests +cd mytests + +mpirun -n 2 ../generic.Linux +mpirun -n 2 ../weibel.deck.Linux +mpirun -n 2 ../dump.deck.Linux +cd ../ +``` + +NOTE: many of the sample decks don't work. The unit tests work, but aren't +really physically accurate and are just unit tests. diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic.md new file mode 100644 index 00000000..3ec38833 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/vpic.md @@ -0,0 +1,35 @@ +# VPIC + +## Installation + +```bash +scspkg create vpic +cd $(scspkg pkg root vpic)/.. +rm -rf vpic +git clone --recursive https://github.com/lanl/vpic.git vpic +cd vpic +mkdir build +cd build +cmake ../ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DENABLE_INTEGRATED_TESTS=ON \ + -DENABLE_UNIT_TESTS=ON \ + -DCMAKE_C_FLAGS="-rdynamic" \ + -DCMAKE_CXX_FLAGS="-rdynamic" \ + -DCMAKE_INSTALL_PREFIX=$(scspkg pkg root vpic) +make -j8 +make install +module load vpic +``` + +## The generic example + +Compile: +``` +vpic $(scspkg pkg root vpic)/sample/harris +``` + +Run: +``` +./harris.Linux +``` diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/wrf.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/wrf.md new file mode 100644 index 00000000..31690f60 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/12-packages/wrf.md @@ -0,0 +1,473 @@ +# WRF + +## Installation + +### System Check + +Create a Test folder + +```bash +mkdir TESTS +cd TESTS +``` + +Download the test cases and start tests, the output of tests should include SUCCESS + +```bash +wget https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/Fortran_C_tests.tar +tar -xf Fortran_C_tests.tar +gfortran TEST_1_fortran_only_fixed.f +./a.out +gfortran TEST_2_fortran_only_free.f90 +./a.out +gcc TEST_3_c_only.c +./a.out +gcc -c -m64 TEST_4_fortran+c_c.c +gfortran -c -m64 TEST_4_fortran+c_f.f90 +gfortran -m64 TEST_4_fortran+c_f.o TEST_4_fortran+c_c.o +./a.out +./TEST_csh.csh +./TEST_perl.pl +./TEST_sh.sh +``` + +### Create a library folder and install the library + +```bash +mkdir Build_WRF +cd Build_WRF +mkdir LIBRARIES +cd LIBRARIES +``` + +Download and unzip the required libraries. + +1. nedcdf-c-4.9.2 + +```bash +wget -c https://downloads.unidata.ucar.edu/netcdf-c/4.9.2/netcdf-c-4.9.2.tar.gz +tar xzvf netcdf-c-4.9.2.tar.gz +``` + +2. hdf5(you can also install the newest version) + +```bash +wget -c https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.12/hdf5-1.12.2/src/hdf5-1.12.2.tar.gz +tar xzvf hdf5-1.12.2.tar.gz +``` + +3. zlib(1.2.11) + +```bash +wget -c https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/zlib-1.2.11.tar.gz +tar xzvf zlib-1.2.11.tar.gz +``` + +4. NETCDF-Fortran(4.6.1) + +```bash +wget https://downloads.unidata.ucar.edu/netcdf-fortran/4.6.1/netcdf-fortran-4.6.1.tar.gz +tar xzvf netcdf-fortran-4.6.1.tar.gz +``` + +5. PnetCDF(1.12.3) + +```bash +wget https://parallel-netcdf.github.io/Release/pnetcdf-1.12.3.tar.gz +tar xzvf pnetcdf-1.12.3.tar.gz +``` + +6. LibPNG + +```bash +wget -c https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/libpng-1.2.50.tar.gz +tar xzvf libpng-1.2.50.tar.gz +``` + +7. MPICH + when load the adios2, mpi is automatically loaded in PATH + +```bash +module load adios2/2.9.1-6fh7kh2 +``` + +### Choice 1: Install the library(with the HDF5 support and without PnetCDF support) + +Set the environment variables + +```bash +export DIR=/PATH/TO/Build_WRF/LIBRARIES +export CC=gcc +export CXX=g++ +export FC=gfortran +export F77=gfortran +module load libxml2 +``` + +1. Install the zlib + +```bash +cd zlib-1.2.11/ +./configure --prefix=$DIR +make +make install +``` + +2. Install the hdf5 + +```bash +cd hdf5-1.12.2/ +./configure --prefix=$DIR --with-zlib=$DIR --enable-hl --enable-fortran +make check +make install +export HDF5=$DIR +export LD_LIBRARY_PATH=$DIR/lib:$LD_LIBRARY_PATH +``` + +3. Build NETCDF-C + +```bash +cd netcdf-c-4.9.2 +export CPPFLAGS=-I$DIR/include +export LDFLAGS=-L$DIR/lib + ./configure --prefix=$DIR --disable-dap +make check +make install +export PATH=$DIR/bin:$PATH +export NETCDF=$DIR +``` + +4. Build NetCDF fortran library links with NETCDF-C, after successful installation, + netcdf.inc will be `${NETCDF}/include` file. + For sometimes, it will show C compiler stop working, Please check the environment variable in + LIBS="-lnetcdf -lhdf5_hl -lhdf5 -lz", make sure netcdf and hdf5 path in the environment variables. + +```bash +export LD_LIBRARY_PATH=$DIR/lib:$LD_LIBRARY_PATH +export CPPFLAGS="-I${DIR}/include -I/usr/include" +export LDFLAGS="-L${DIR}/lib -L/usr/lib" +export LIBS="-lnetcdf -lhdf5_hl -lhdf5 -lz" +./configure --prefix=$DIR --disable-shared +make check +make install +``` + +Library Compatibility Tests( the result should show SUCCESS) + +```bash +mkdir LibTests +cd libTests +wget https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/Fortran_C_NETCDF_MPI_tests.tar +tar -xf Fortran_C_NETCDF_MPI_tests.tar +cp ${NETCDF}/include/netcdf.inc . +gfortran -c 01_fortran+c+netcdf_f.f +gcc -c 01_fortran+c+netcdf_c.c +gfortran 01_fortran+c+netcdf_f.o 01_fortran+c+netcdf_c.o \ + -L${NETCDF}/lib -lnetcdff -lnetcdf +./a.out +``` + +5. Install the WRF + The adios2 need to be installed with c-blocs, mpi, and hdf5. + +```Bash +cd .. +wget https://github.com/wrf-model/WRF/releases/download/v4.5.1/v4.5.1.tar.gz +tar xvzf v4.5.1.tar.gz +cd WRFV4.5.1 +module load adios2/2.9.1-6fh7kh2 +export ADIOS2="/mnt/repo/software/spack/spack/opt/spack/linux-ubuntu22.04-skylake_avx512/gcc-11.3.0/adios2-2.9.1-6fh7kh2v3tombadg56kmop72sjhf23dg" +export HDF5=$DIR +export NETCDF=$DIR +export NETCDF_classic=1 +./configure +``` + +Please select from among the following Linux x86_64 options: +Choose 34(dmpar), +Compile for nesting? +choose 1. +If configure is done, you can see a configure.wrf file in the folder. + +```bash +./compile em_real + #./compile em_real >& log.compile +export WRF_DIR=PATH/to/WRFV4.5.1 +``` + +You can see the following information shows the installation is successful. + +```bash +---> Executables successfully built <--- + -rwxrwxr-x 1 hxu40 hxu40 51667784 Sep 15 12:21 main/ndown.exe + -rwxrwxr-x 1 hxu40 hxu40 51794800 Sep 15 12:21 main/real.exe + -rwxrwxr-x 1 hxu40 hxu40 50930720 Sep 15 12:21 main/tc.exe + -rwxrwxr-x 1 hxu40 hxu40 59885752 Sep 15 12:21 main/wrf.exe + +========================================================================== +``` + +To check check whether it was successful + +```bash +ls -ls main/*.exe +``` + +If you compiled a real case, you should see: + +```bash +wrf.exe (model executable) +real.exe (real data initialization) +ndown.exe (one-way nesting) +tc.exe (for tc bogusing--serial only) +``` + +### Choice 2: Install the libarary(with both HDF5 support and PnetCDF support) + +TBD, Looks like not necessary for this case + +### Install WPS + +Download and install required library + +```bash +cd $DIR +wget https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/libpng-1.2.50.tar.gz +tar xzvf libpng-1.2.50.tar.gz +cd libpng-1.2.50 +./configure --prefix=$DIR/grib2 +make +make install +cd .. +wget https://www2.mmm.ucar.edu/wrf/OnLineTutorial/compile_tutorial/tar_files/jasper-1.900.1.tar.gz +tar xzvf jasper-1.900.1.tar.gz +cd jasper-1.900.1 +./configure --prefix=$DIR/grib2 +make +make install +cd .. +cd $DIR +cd zlib_1.22.1 +./configure --prefix=$DIR/grib2 +make +make install +export JASPERLIB=$DIR/grib2/lib +export JASPERINC=$DIR/grib2/include +``` + +Download WPS and install + +```bash +#go to WRF folder +mkdir WPS +wget https://github.com/wrf-model/WPS/archive/v4.5.tar.gz +tar -xvzf v4.5.tar.gz +cd WPS-4.5 +export JASPERLIB=$DIR/grib2/lib +export JASPERINC=$DIR/grib2/include +./configure --build-grib2-libs #Option 3 for gfortran and distributed memory +./compile +``` + +IF you see this message after ./configure, please ignore + +````bash +Testing for NetCDF, C and Fortran compiler +This installation NetCDF is 64-bit +C compiler is 64-bit +Fortran compiler is 64-bit +Your versions of Fortran and NETCDF are not consistent. +```bash + +If the compile successfully, +there should be 3 executables in the WPS top-level directory, that are linked to their corresponding src/ directories: +```bash +geogrid.exe -> geogrid/src/geogrid.exe +ungrib.exe -> ungrib/src/ungrib.exe +metgrid.exe -> metgrid/src/metgrid.exe +```` + +## Usage + +1. Download the benchmark dataset, here is the website for dataset: + https://www2.mmm.ucar.edu/wrf/users/benchmark/v44/benchdata_v44.html + Choose one of them for testing. + Unzip the file. + copy the wrfbdy_d01, wrfinput_d01 and namelist.input file to WRF/test/em_real file. + +``` +change the namelist.input file +The ADIOS2 I/O option for history and/or restart file is enabled by setting one of the following: +io_form_history = 14 +io_form_restart = 14 +frames_per_outfile = 1000000, +history_outname = '/Path/to/output/wrfout_d_' (this part also can be changed in WRF/Register/register.io_bipartitie file) +Do not change the wrfout_d_ +``` + +Run wrf with adios2 +add adios2.xml file in the same folder(wrf/test/em_real) +Here is the example of adios2.xml file: + +```bash + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +```bash +module load adios2/2.9.1-6fh7kh2 +export DIR=~/Build_WRF/LIBRARIES +export CC=gcc +export CXX=g++ +export FC=gfortran +export F77=gfortran +export HDF5=$DIR +export LD_LIBRARY_PATH=$DIR/lib:$LD_LIBRARY_PATH +export NETCDF=$DIR +export PATH=$DIR/bin:$PATH +export LD_LIBRARY_PATH=/mnt/repo/software/spack/spack/opt/spack/linux-ubuntu22.04-skylake_avx512/gcc-11.3.0/adios2-2.9.1-6fh7kh2v3tombadg56kmop72sjhf23dg/lib:$LD_LIBRARY_PATH +./wrf.exe or(mpirun -np ./wrf.exe +``` + +The BP5 output will have mmd.0 file and the BP4 does not have mmd.0 file +THe output file has the same name for different machine: "wrfout_d01_2019-11-26_12:00:00" + +## Post Processing + +Here is the python script. Then add the same adios2.xml file into the same folder with python + +```bash +import argparse +import adios2 # pylint: disable=import-error +import numpy as np # pylint: disable=import-error +import matplotlib.pyplot as plt # pylint: disable=import-error +import matplotlib.gridspec as gridspec # pylint: disable=import-error +from mpi4py import MPI # pylint: disable=import-error +import cartopy.crs as ccrs # pylint: disable=import-error +import cartopy.feature as cfeature # pylint: disable=import-error +from mpl_toolkits.axes_grid1 import make_axes_locatable # pylint: disable=import-error +# +# +def setup_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--instream", "-i", help="Name of the input stream", default="wrfout_d01_2019-11-26_23:00:00") + parser.add_argument("--outfile", "-o", help="Name of the output file", default="screen") + parser.add_argument("--varname", "-v", help="Name of variable read", default="T2") + args = parser.parse_args() + return args + +def plot_var(var, fr_step): + + lccproj = ccrs.LambertConformal(central_longitude=-74.5, central_latitude=38.8) + fig, ax = plt.subplots(figsize=(15, 18), subplot_kw=dict(projection=lccproj)) + plt.subplots_adjust(right=0.88) # adjust the right margin of the plot + title = fr_step.read_string("Times") + plt.title("WRF-ADIOS2 Demo \n {}".format(title[0]), fontsize=17) + + # format the spacing of the colorbar + divider = make_axes_locatable(ax) + cax = divider.new_horizontal(size='5%', pad=0.1, axes_class=plt.Axes) + fig.add_axes(cax) + + displaysec = 0.5 + cur_step = fr_step.current_step() + x = fr_step.read("XLONG") + y = fr_step.read("XLAT") + data = fr_step.read(var) + print(data) + data = data * 9 / 5 - 459.67 #convert from K to F + + # define the limits for the model to subset and plot + # model_lims = dict(minlon=-80, maxlon=-69, minlat=35, maxlat=43) + + # # create boolean indices where lat/lon are within defined boundaries + # lon_ind = np.logical_and(x > model_lims['minlon'], x < model_lims['maxlon']) + # lat_ind = np.logical_and(y > model_lims['minlat'], y < model_lims['maxlat']) + # # find i and j indices of lon/lat in boundaries + # ind = np.where(np.logical_and(lon_ind, lat_ind)) + + # data = np.squeeze(data)[range(np.min(ind[0]), np.max(ind[0]) + 1), + # range(np.min(ind[1]), np.max(ind[1]) + 1)] + + h = ax.pcolormesh(x, y, data, vmin=-20, vmax=110, + cmap='jet', transform=ccrs.PlateCarree()) + + cb = plt.colorbar(h, cax=cax) + cb.set_label(label="Temperature [F]", fontsize=14) # add the label on the colorbar + cb.ax.tick_params(labelsize=12) # format the size of the tick labels + + # add contours + contour_list = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # define contour levels + cs = ax.contour(x, y, data, contour_list, colors='black', + linewidths=.5, transform=ccrs.PlateCarree()) + ax.clabel(cs, inline=True, fontsize=10.5, fmt='%d') + + # add the latitude and longitude gridlines + gl = ax.gridlines(draw_labels=True, linewidth=1, color='gray', alpha=0.5, + linestyle='dotted', x_inline=False) + gl.top_labels = False + gl.right_labels = False + gl.xlabel_style = {'size': 13} + gl.ylabel_style = {'size': 13} + + # add map features + land = cfeature.NaturalEarthFeature('physical', 'land', '10m') + ax.add_feature(land, zorder=5, edgecolor='black', facecolor='none') + + state_lines = cfeature.NaturalEarthFeature( + category='cultural', + name='admin_1_states_provinces_lines', + scale='10m', + facecolor='none') + + ax.add_feature(cfeature.BORDERS, zorder=6) + ax.add_feature(state_lines, zorder=7, edgecolor='black') + + #plt.title(title) + + # plt.ion() + #plt.show() + # plt.pause(displaysec) + # #clear_output() + # plt.clf() + + imgfile = "image"+"{0:0>5}.png".format(cur_step) + plt.savefig(imgfile) + plt.clf() + +if __name__ == "__main__": + args = setup_args() + fr = adios2.open(args.instream, "r", MPI.COMM_WORLD, "adios2.xml", "wrfout_d01_2019-11-26_23:00:00") + + for fr_step in fr: + plot_var(args.varname, fr_step) + + fr.close() + +``` + +Run this python script with adios2.xml file in the same folder. diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/20-future-work.md b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/20-future-work.md new file mode 100644 index 00000000..8083f51b --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/20-future-work.md @@ -0,0 +1,8 @@ +# Future Work + +1. What if we aren't using SSH? +2. How do we specify custom SSH keys? Password? +3. Can we schedule jarvis jobs automatically with slurm? +4. Can we build containers around pipelines automatically? +5. Can we add dependencies to pipelines? + diff --git a/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/images/pipeline.svg b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/images/pipeline.svg new file mode 100644 index 00000000..5ec1a3f4 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/02-jarvis-cd/images/pipeline.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/07-iowarp/03-platform-plugins-interface/_category_.yml b/docs/07-iowarp/03-platform-plugins-interface/_category_.yml new file mode 100644 index 00000000..6a00e921 --- /dev/null +++ b/docs/07-iowarp/03-platform-plugins-interface/_category_.yml @@ -0,0 +1 @@ +label: "Jarvis"