From 6f950b0110a55af747399ed49f28c843cd6b4a68 Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 15:01:11 +0200 Subject: [PATCH 1/6] Checkpoint work so far. --- pyproject.toml | 1 + scabha/basetypes.py | 72 ++++++++++++++++++++++++++++++++++++--- stimela/backends/utils.py | 10 +++--- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7febb0e1..8bab874a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ pydantic = "^1.10.2" psutil = "^5.9.3" rich = "^13.7.0" dill = "^0.3.6" +typeguard = "^4.2.1" [tool.poetry.scripts] stimela = "stimela.main:cli" diff --git a/scabha/basetypes.py b/scabha/basetypes.py index fa8c438c..23eb16bc 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,9 +1,12 @@ from dataclasses import field, dataclass from collections import OrderedDict -from typing import List +from typing import List, Union, get_args, get_origin import os.path import re from .exceptions import UnsetError +from itertools import zip_longest +from typeguard import check_type, TypeCheckError + def EmptyDictDefault(): return field(default_factory=lambda:OrderedDict()) @@ -55,7 +58,7 @@ def __init__(self, value): def parse(value: str, expand_user=True): """ Parses URI. If URI does not start with "protocol://", assumes "file://" - + Returns tuple of (protocol, path, is_remote) If expand_user is True, ~ in (file-protocol) paths will be expanded. @@ -75,7 +78,7 @@ class File(URI): @property def NAME(self): return File(os.path.basename(self)) - + @property def PATH(self): return File(os.path.abspath(self)) @@ -95,7 +98,7 @@ def BASENAME(self): @property def EXT(self): return os.path.splitext(self)[1] - + @property def EXISTS(self): return os.path.exists(self) @@ -114,3 +117,64 @@ def is_file_type(dtype): def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) + +class Skip(object): + def iterate_samples(self, collection): + return () + + +def get_filelikes(dtype, value, filelikes=None): + """Recursively recover all filelike elements from a composite dtype.""" + + filelikes = set() if filelikes is None else filelikes + + origin = get_origin(dtype) + args = get_args(dtype) + + if origin: # Implies composition. + + if origin is dict: + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + k_dtype, v_dtype = args + + for k, v in value.items(): + filelikes = get_filelikes(k_dtype, k, filelikes) + filelikes = get_filelikes(v_dtype, v, filelikes) + + elif origin in (tuple, list, set): + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + # This is a special case for tuples of arbitrary + # length i.e. list-like behaviour. + if ... in args: + args = tuple([a for a in args if a != ...]) + + for dt, v in zip_longest(args, value, fillvalue=args[0]): + filelikes = get_filelikes(dt, v, filelikes) + + elif origin is Union: + + for dt in args: + + try: + # Do not check collection member types. + check_type(value, dt, collection_check_strategy=Skip()) + except TypeCheckError: + continue + filelikes = get_filelikes(dt, value, filelikes) + + else: + raise ValueError(f"Failed to traverse {dtype} dtype when looking for files.") + + else: + if is_file_type(dtype): + filelikes.add(value) + + return filelikes diff --git a/stimela/backends/utils.py b/stimela/backends/utils.py index 2dbe5124..accf5aee 100644 --- a/stimela/backends/utils.py +++ b/stimela/backends/utils.py @@ -4,7 +4,7 @@ from stimela.kitchen.cab import Cab, Parameter from scabha.exceptions import SchemaError from stimela.exceptions import BackendError -from scabha.basetypes import File, Directory, MS, URI +from scabha.basetypes import File, Directory, MS, URI, get_filelikes ## commenting out for now -- will need to fix when we reactive the kube backend (and have tests for it) @@ -34,11 +34,9 @@ def add_target(param_name, path, must_exist, readwrite): if schema is None: raise SchemaError(f"parameter {name} not in defined inputs or outputs for this cab. This should have been caught by validation earlier!") - if schema.is_file_type: - files = [value] - elif schema.is_file_list_type: - files = value - else: + files = get_filelikes(schema._dtype, value) + + if not files: continue must_exist = schema.must_exist and name in inputs From 2c4a9b8998f941b8d2b221491c4a84f6d09f573a Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 16:37:20 +0200 Subject: [PATCH 2/6] Add tests and refine approach. --- scabha/basetypes.py | 36 ++++++++++++++++-------- tests/scabha_tests/test_filelikes.py | 42 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 tests/scabha_tests/test_filelikes.py diff --git a/scabha/basetypes.py b/scabha/basetypes.py index 23eb16bc..f541dd4b 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,11 +1,15 @@ +from __future__ import annotations from dataclasses import field, dataclass from collections import OrderedDict -from typing import List, Union, get_args, get_origin +from typing import List, Union, get_args, get_origin, Any import os.path import re from .exceptions import UnsetError from itertools import zip_longest -from typeguard import check_type, TypeCheckError +from typeguard import ( + check_type, TypeCheckError, TypeCheckerCallable, TypeCheckMemo, checker_lookup_functions +) +from inspect import isclass def EmptyDictDefault(): @@ -118,11 +122,21 @@ def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) -class Skip(object): - def iterate_samples(self, collection): - return () +def check_filelike(value: Any, origin_type: Any, args: tuple[Any, ...], memo: TypeCheckMemo) -> None: + """Custom checker for filelike objects. Currently checks for strings.""" + if not isinstance(value, str): + raise TypeCheckError(f'{value} is not compatible with URI or its subclasses.') +def filelike_lookup(origin_type: Any, args: tuple[Any, ...], extras: tuple[Any, ...]) -> TypeCheckerCallable | None: + """Lookup the custom checker for filelike objects.""" + if isclass(origin_type) and issubclass(origin_type, URI): + return check_filelike + + return None + +checker_lookup_functions.append(filelike_lookup) # Register custom type checker. + def get_filelikes(dtype, value, filelikes=None): """Recursively recover all filelike elements from a composite dtype.""" @@ -152,9 +166,9 @@ def get_filelikes(dtype, value, filelikes=None): return filelikes # This is a special case for tuples of arbitrary - # length i.e. list-like behaviour. - if ... in args: - args = tuple([a for a in args if a != ...]) + # length i.e. list-like behaviour. We can simply + # strip out the Ellipsis. + args = tuple([arg for arg in args if arg != ...]) for dt, v in zip_longest(args, value, fillvalue=args[0]): filelikes = get_filelikes(dt, v, filelikes) @@ -162,11 +176,9 @@ def get_filelikes(dtype, value, filelikes=None): elif origin is Union: for dt in args: - try: - # Do not check collection member types. - check_type(value, dt, collection_check_strategy=Skip()) - except TypeCheckError: + check_type(value, dt) + except TypeCheckError: # Value doesn't match dtype - incorrect branch. continue filelikes = get_filelikes(dt, value, filelikes) diff --git a/tests/scabha_tests/test_filelikes.py b/tests/scabha_tests/test_filelikes.py new file mode 100644 index 00000000..b3697d92 --- /dev/null +++ b/tests/scabha_tests/test_filelikes.py @@ -0,0 +1,42 @@ +from scabha.basetypes import get_filelikes, File, URI, Directory, MS +from typing import Dict, List, Set, Tuple, Union, Optional +import pytest + + +@pytest.fixture(scope="module", params=[File, URI, Directory, MS]) +def templates(request): + + ft = request.param + + TEMPLATES = ( + (Tuple, (), set()), + (Tuple[int, ...], [1, 2], set()), + (Tuple[ft, ...], ("foo", "bar"), {"foo", "bar"}), + (Tuple[ft, str], ("foo", "bar"), {"foo"}), + (Dict[str, int], {"a": 1, "b": 2}, set()), + (Dict[str, ft], {"a": "foo", "b": "bar"}, {"foo", "bar"}), + (Dict[ft, str], {"foo": "a", "bar": "b"}, {"foo", "bar"}), + (List[ft], [], set()), + (List[int], [1, 2], set()), + (List[ft], ["foo", "bar"], {"foo", "bar"}), + (Set[ft], set(), set()), + (Set[int], {1, 2}, set()), + (Set[ft], {"foo", "bar"}, {"foo", "bar"}), + (Union[str, List[ft]], "foo", set()), + (Union[str, List[ft]], ["foo"], {"foo"}), + (Union[str, Tuple[ft]], "foo", set()), + (Union[str, Tuple[ft]], ("foo",), {"foo"}), + (Optional[ft], None, set()), + (Optional[ft], "foo", {"foo"}), + (Optional[Union[ft, int]], 1, set()), + (Optional[Union[ft, int]], "foo", {"foo"}), + (Dict[str, Tuple[ft, str]], {"a": ("foo", "bar")}, {"foo"}) + ) + + return TEMPLATES + + +def test_get_filelikes(templates): + + for dt, v, res in templates: + assert get_filelikes(dt, v) == res, f"Failed for dtype {dt} and value {v}." From afe569efffd67c6431efc0825ba170a61f6f5ffb Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 15:01:11 +0200 Subject: [PATCH 3/6] Checkpoint work so far. --- pyproject.toml | 1 + scabha/basetypes.py | 72 ++++++++++++++++++++++++++++++++++++--- stimela/backends/utils.py | 10 +++--- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 306a7a52..2da49782 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ pydantic = "^1.10.2" psutil = "^5.9.3" rich = "^13.7.0" dill = "^0.3.6" +typeguard = "^4.2.1" [tool.poetry.scripts] stimela = "stimela.main:cli" diff --git a/scabha/basetypes.py b/scabha/basetypes.py index fa8c438c..23eb16bc 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,9 +1,12 @@ from dataclasses import field, dataclass from collections import OrderedDict -from typing import List +from typing import List, Union, get_args, get_origin import os.path import re from .exceptions import UnsetError +from itertools import zip_longest +from typeguard import check_type, TypeCheckError + def EmptyDictDefault(): return field(default_factory=lambda:OrderedDict()) @@ -55,7 +58,7 @@ def __init__(self, value): def parse(value: str, expand_user=True): """ Parses URI. If URI does not start with "protocol://", assumes "file://" - + Returns tuple of (protocol, path, is_remote) If expand_user is True, ~ in (file-protocol) paths will be expanded. @@ -75,7 +78,7 @@ class File(URI): @property def NAME(self): return File(os.path.basename(self)) - + @property def PATH(self): return File(os.path.abspath(self)) @@ -95,7 +98,7 @@ def BASENAME(self): @property def EXT(self): return os.path.splitext(self)[1] - + @property def EXISTS(self): return os.path.exists(self) @@ -114,3 +117,64 @@ def is_file_type(dtype): def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) + +class Skip(object): + def iterate_samples(self, collection): + return () + + +def get_filelikes(dtype, value, filelikes=None): + """Recursively recover all filelike elements from a composite dtype.""" + + filelikes = set() if filelikes is None else filelikes + + origin = get_origin(dtype) + args = get_args(dtype) + + if origin: # Implies composition. + + if origin is dict: + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + k_dtype, v_dtype = args + + for k, v in value.items(): + filelikes = get_filelikes(k_dtype, k, filelikes) + filelikes = get_filelikes(v_dtype, v, filelikes) + + elif origin in (tuple, list, set): + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + # This is a special case for tuples of arbitrary + # length i.e. list-like behaviour. + if ... in args: + args = tuple([a for a in args if a != ...]) + + for dt, v in zip_longest(args, value, fillvalue=args[0]): + filelikes = get_filelikes(dt, v, filelikes) + + elif origin is Union: + + for dt in args: + + try: + # Do not check collection member types. + check_type(value, dt, collection_check_strategy=Skip()) + except TypeCheckError: + continue + filelikes = get_filelikes(dt, value, filelikes) + + else: + raise ValueError(f"Failed to traverse {dtype} dtype when looking for files.") + + else: + if is_file_type(dtype): + filelikes.add(value) + + return filelikes diff --git a/stimela/backends/utils.py b/stimela/backends/utils.py index 2dbe5124..accf5aee 100644 --- a/stimela/backends/utils.py +++ b/stimela/backends/utils.py @@ -4,7 +4,7 @@ from stimela.kitchen.cab import Cab, Parameter from scabha.exceptions import SchemaError from stimela.exceptions import BackendError -from scabha.basetypes import File, Directory, MS, URI +from scabha.basetypes import File, Directory, MS, URI, get_filelikes ## commenting out for now -- will need to fix when we reactive the kube backend (and have tests for it) @@ -34,11 +34,9 @@ def add_target(param_name, path, must_exist, readwrite): if schema is None: raise SchemaError(f"parameter {name} not in defined inputs or outputs for this cab. This should have been caught by validation earlier!") - if schema.is_file_type: - files = [value] - elif schema.is_file_list_type: - files = value - else: + files = get_filelikes(schema._dtype, value) + + if not files: continue must_exist = schema.must_exist and name in inputs From 9e574eac8e53f90b13914774f8eeda5f925f2ce9 Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 16:37:20 +0200 Subject: [PATCH 4/6] Add tests and refine approach. --- scabha/basetypes.py | 36 ++++++++++++++++-------- tests/scabha_tests/test_filelikes.py | 42 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 tests/scabha_tests/test_filelikes.py diff --git a/scabha/basetypes.py b/scabha/basetypes.py index 23eb16bc..f541dd4b 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,11 +1,15 @@ +from __future__ import annotations from dataclasses import field, dataclass from collections import OrderedDict -from typing import List, Union, get_args, get_origin +from typing import List, Union, get_args, get_origin, Any import os.path import re from .exceptions import UnsetError from itertools import zip_longest -from typeguard import check_type, TypeCheckError +from typeguard import ( + check_type, TypeCheckError, TypeCheckerCallable, TypeCheckMemo, checker_lookup_functions +) +from inspect import isclass def EmptyDictDefault(): @@ -118,11 +122,21 @@ def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) -class Skip(object): - def iterate_samples(self, collection): - return () +def check_filelike(value: Any, origin_type: Any, args: tuple[Any, ...], memo: TypeCheckMemo) -> None: + """Custom checker for filelike objects. Currently checks for strings.""" + if not isinstance(value, str): + raise TypeCheckError(f'{value} is not compatible with URI or its subclasses.') +def filelike_lookup(origin_type: Any, args: tuple[Any, ...], extras: tuple[Any, ...]) -> TypeCheckerCallable | None: + """Lookup the custom checker for filelike objects.""" + if isclass(origin_type) and issubclass(origin_type, URI): + return check_filelike + + return None + +checker_lookup_functions.append(filelike_lookup) # Register custom type checker. + def get_filelikes(dtype, value, filelikes=None): """Recursively recover all filelike elements from a composite dtype.""" @@ -152,9 +166,9 @@ def get_filelikes(dtype, value, filelikes=None): return filelikes # This is a special case for tuples of arbitrary - # length i.e. list-like behaviour. - if ... in args: - args = tuple([a for a in args if a != ...]) + # length i.e. list-like behaviour. We can simply + # strip out the Ellipsis. + args = tuple([arg for arg in args if arg != ...]) for dt, v in zip_longest(args, value, fillvalue=args[0]): filelikes = get_filelikes(dt, v, filelikes) @@ -162,11 +176,9 @@ def get_filelikes(dtype, value, filelikes=None): elif origin is Union: for dt in args: - try: - # Do not check collection member types. - check_type(value, dt, collection_check_strategy=Skip()) - except TypeCheckError: + check_type(value, dt) + except TypeCheckError: # Value doesn't match dtype - incorrect branch. continue filelikes = get_filelikes(dt, value, filelikes) diff --git a/tests/scabha_tests/test_filelikes.py b/tests/scabha_tests/test_filelikes.py new file mode 100644 index 00000000..b3697d92 --- /dev/null +++ b/tests/scabha_tests/test_filelikes.py @@ -0,0 +1,42 @@ +from scabha.basetypes import get_filelikes, File, URI, Directory, MS +from typing import Dict, List, Set, Tuple, Union, Optional +import pytest + + +@pytest.fixture(scope="module", params=[File, URI, Directory, MS]) +def templates(request): + + ft = request.param + + TEMPLATES = ( + (Tuple, (), set()), + (Tuple[int, ...], [1, 2], set()), + (Tuple[ft, ...], ("foo", "bar"), {"foo", "bar"}), + (Tuple[ft, str], ("foo", "bar"), {"foo"}), + (Dict[str, int], {"a": 1, "b": 2}, set()), + (Dict[str, ft], {"a": "foo", "b": "bar"}, {"foo", "bar"}), + (Dict[ft, str], {"foo": "a", "bar": "b"}, {"foo", "bar"}), + (List[ft], [], set()), + (List[int], [1, 2], set()), + (List[ft], ["foo", "bar"], {"foo", "bar"}), + (Set[ft], set(), set()), + (Set[int], {1, 2}, set()), + (Set[ft], {"foo", "bar"}, {"foo", "bar"}), + (Union[str, List[ft]], "foo", set()), + (Union[str, List[ft]], ["foo"], {"foo"}), + (Union[str, Tuple[ft]], "foo", set()), + (Union[str, Tuple[ft]], ("foo",), {"foo"}), + (Optional[ft], None, set()), + (Optional[ft], "foo", {"foo"}), + (Optional[Union[ft, int]], 1, set()), + (Optional[Union[ft, int]], "foo", {"foo"}), + (Dict[str, Tuple[ft, str]], {"a": ("foo", "bar")}, {"foo"}) + ) + + return TEMPLATES + + +def test_get_filelikes(templates): + + for dt, v, res in templates: + assert get_filelikes(dt, v) == res, f"Failed for dtype {dt} and value {v}." From b3f8ee07753e2f9a4452cdb12aec5325a76793fe Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 27 Mar 2024 11:30:32 +0200 Subject: [PATCH 5/6] added backend.singularity.bind_dirs option --- stimela/backends/singularity.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/stimela/backends/singularity.py b/stimela/backends/singularity.py index 54d515e7..79d1a8cc 100644 --- a/stimela/backends/singularity.py +++ b/stimela/backends/singularity.py @@ -1,15 +1,14 @@ import subprocess import os -import re import logging -from stimela import utils +from enum import Enum import stimela from shutil import which from dataclasses import dataclass from omegaconf import OmegaConf from typing import Dict, List, Any, Optional, Callable from contextlib import ExitStack -from scabha.basetypes import EmptyListDefault +from scabha.basetypes import EmptyDictDefault import datetime from stimela.utils.xrun_asyncio import xrun @@ -17,6 +16,9 @@ from . import native +ReadWriteMode = Enum("ReadWriteMode", "ro rw", module=__name__) + + @dataclass class SingularityBackendOptions(object): enable: bool = True @@ -26,6 +28,8 @@ class SingularityBackendOptions(object): executable: Optional[str] = None remote_only: bool = False # if True, won't look for singularity on local system -- useful in combination with slurm wrapper + # optional extra bindings + bind_dirs: Dict[str, ReadWriteMode] = EmptyDictDefault() # @dataclass # class EmptyVolume(object): # name: str @@ -250,6 +254,10 @@ def run(cab: 'stimela.kitchen.cab.Cab', params: Dict[str, Any], fqname: str, # initial set of mounts has cwd as read-write mounts = {cwd: True} + # add extra binds + for path, rw in backend.singularity.bind_dirs.items(): + mounts[path] = mounts.get(path, False) or (rw == ReadWriteMode.rw) + # get extra required filesystem bindings resolve_required_mounts(mounts, params, cab.inputs, cab.outputs) From f3ae235a5ca94502087638b8fd26a34aba240302 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 27 Mar 2024 11:33:04 +0200 Subject: [PATCH 6/6] added ~ support to bind paths --- stimela/backends/singularity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stimela/backends/singularity.py b/stimela/backends/singularity.py index 79d1a8cc..97700393 100644 --- a/stimela/backends/singularity.py +++ b/stimela/backends/singularity.py @@ -256,6 +256,7 @@ def run(cab: 'stimela.kitchen.cab.Cab', params: Dict[str, Any], fqname: str, mounts = {cwd: True} # add extra binds for path, rw in backend.singularity.bind_dirs.items(): + path = os.path.expanduser(path) mounts[path] = mounts.get(path, False) or (rw == ReadWriteMode.rw) # get extra required filesystem bindings