diff --git a/cupid/ploomber.py b/cupid/ploomber.py new file mode 100644 index 00000000..430f0f92 --- /dev/null +++ b/cupid/ploomber.py @@ -0,0 +1,176 @@ +from __future__ import annotations + +import os +import subprocess +import sys +import tempfile +from pathlib import Path +from subprocess import PIPE + +from ploomber.tasks import ScriptRunner + + +def _python_bin(): + """ + Get the path to the Python executable, return 'python' if unable to get it + """ + executable = sys.executable + return executable if executable else "python" + + +def _run_script_in_subprocess(interpreter, path, cwd, env): + if isinstance(interpreter, str): + res = subprocess.run([interpreter, str(path)], cwd=cwd, env=env, stderr=PIPE) + else: + res = subprocess.run(interpreter + [str(path)], cwd=cwd, env=env, stderr=PIPE) + if res.returncode: + stderr = res.stderr.decode() + + if "SyntaxError" in stderr: + stderr += ( + "(Note: IPython magics are not supported in " + "ScriptRunner, remove them or use the regular " + "NotebookRunner)" + ) + + raise RuntimeError("Error while executing ScriptRunner:\n" f"{stderr}") + + +class CUPiDScriptRunner(ScriptRunner): + """ + Similar to NotebookRunner, except it uses python to run the code, + instead of papermill, hence, it doesn't generate an output notebook. But + it also works by injecting a cell into the source code. Source can be + a ``.py`` script or an ``.ipynb`` notebook. **Does not support magics.** + + Parameters + ---------- + source: str or pathlib.Path + Script source, if str, the content is interpreted as the actual + script, if pathlib.Path, the content of the file is loaded. When + loading from a str, ext_in must be passed + product: ploomber.File + The output file + dag: ploomber.DAG + A DAG to add this task to + name: str, optional + A str to indentify this task. Should not already exist in the dag + params: dict, optional + Script parameters. This are passed as the "parameters" argument + to the papermill.execute_notebook function, by default, "product" + and "upstream" are included + ext_in: str, optional + Source extension. Required if loading from a str. If source is a + ``pathlib.Path``, the extension from the file is used. + static_analysis : ('disabled', 'regular', 'strict'), default='regular' + Check for various errors in the script. In 'regular' mode, it aborts + execution if the notebook has syntax issues, or similar problems that + would cause the code to break if executed. In 'strict' mode, it + performs the same checks but raises an issue before starting execution + of any task, furthermore, it verifies that the parameters cell and + the params passed to the notebook match, thus, making the script + behave like a function with a signature. + local_execution : bool, optional + Change working directory to be the parent of the script source. + Defaults to False. + + Examples + -------- + + Spec API: + + .. code-block:: yaml + :class: text-editor + :name: pipeline-yaml + + tasks: + - source: script.py + class: ScriptRunner + product: + data: data.csv + another: another.csv + + Python API: + + >>> from pathlib import Path + >>> from ploomber import DAG + >>> from ploomber.tasks import ScriptRunner + >>> from ploomber.products import File + >>> dag = DAG() + >>> product = {'data': File('data.csv'), 'another': File('another.csv')} + >>> _ = ScriptRunner(Path('script.py'), product, dag=dag) + >>> _ = dag.build() + """ + + def __init__( + self, + source, + product, + dag, + kernelspec_name=None, + name=None, + params=None, + ext_in=None, + static_analysis="regular", + local_execution=False, + ): + self.kernelspec_name = kernelspec_name + self.ext_in = ext_in + + kwargs = dict(hot_reload=dag._params.hot_reload) + self._source = ScriptRunner._init_source( + source, + kwargs, + ext_in, + static_analysis, + False, + False, + ) + self.local_execution = local_execution + super(ScriptRunner, self).__init__(product, dag, name, params) + + def run(self): + # regular mode: raise but not check signature + # strict mode: called at render time + if self.static_analysis == "regular": + self.source._check_notebook(raise_=True, check_signature=False) + + fd, tmp = tempfile.mkstemp(".py") + os.close(fd) + + code = "\n\n".join( + [ + c["source"] + for c in self.source.nb_obj_rendered.cells + if c["cell_type"] == "code" + ], + ) + + cwd = str(self.source.loc.parent.resolve()) + orig_env = os.environ.copy() + + if "PYTHONPATH" not in orig_env: + orig_env["PYTHONPATH"] = cwd + else: + orig_env["PYTHONPATH"] += os.pathsep + cwd + + tmp = Path(tmp) + tmp.write_text(code) + + if self.source.language == "python": + interpreter = _python_bin() + if self.kernelspec_name: + interpreter = f"conda run -n {self.kernelspec_name} python".split() + elif self.source.language == "r": + interpreter = "Rscript" + else: + raise ValueError("ScriptRunner only works with Python and R scripts") + + try: + _run_script_in_subprocess(interpreter, tmp, cwd, orig_env) + except Exception as e: + raise RuntimeError( + "Error when executing task" f" {self.name!r}.", + ) from e # should be TaskBuildError + finally: + tmp.unlink() diff --git a/cupid/util.py b/cupid/util.py index 2d5b9cc6..4e6b7a61 100644 --- a/cupid/util.py +++ b/cupid/util.py @@ -27,6 +27,8 @@ from jinja2 import Template from papermill.engines import NBClientEngine +from cupid.ploomber import CUPiDScriptRunner + class MarkdownJinjaEngine(NBClientEngine): """Class for using the Jinja Engine to run notebooks""" @@ -272,11 +274,12 @@ def create_ploomber_script_task( if cat_path is not None: parms_in["path_to_cat"] = cat_path - task = ploomber.tasks.ScriptRunner( + task = CUPiDScriptRunner( Path(input_path), ploomber.products.File(info["product"]), dag, params=parms_in, + kernelspec_name=info["kernel_name"], name=output_name, ) diff --git a/examples/coupled_model/config2.yml b/examples/coupled_model/config2.yml new file mode 100644 index 00000000..90035142 --- /dev/null +++ b/examples/coupled_model/config2.yml @@ -0,0 +1,163 @@ + +################## SETUP ################## + +################ +# Data Sources # +################ +data_sources: + # sname is any string used as a nickname for this configuration. It will be + ### used as the name of the folder your computed notebooks are put in + sname: quick-run2 + + # run_dir is the path to the folder you want + ### all the files associated with this configuration + ### to be created in + run_dir: . + + # nb_path_root is the path to the folder that cupid will + ### look for your template notebooks in. It doesn't have to + ### be inside run_dir, or be specific to this project, as + ### long as the notebooks are there + nb_path_root: ../nblibrary + +###################### +# Computation Config # +###################### + +computation_config: + + # default_kernel_name is the name of the environment that + ### the notebooks in this configuration will be run in by default. + ### It must already be installed on your machine. You can also + ### specify a different environment than the default for any + ### notebook in NOTEBOOK CONFIG + + default_kernel_name: cupid-analysis + + +############# NOTEBOOK CONFIG ############# + +############################ +# Notebooks and Parameters # +############################ + +# All parameters under global_params get passed to all the notebooks + +global_params: + CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing + lc_kwargs: + threads_per_worker: 1 + +timeseries: + num_procs: 8 + ts_done: [False] + overwrite_ts: [False] + case_name: 'b.e23_alpha16b.BLT1850.ne30_t232.054' + + atm: + vars: ['ACTNI', 'ACTNL', 'ACTREI','ACTREL','AODDUST'] + derive_vars: [] # {'PRECT':['PRECL','PRECC'], 'RESTOM':['FLNT','FSNT']} + hist_str: 'h0' + start_years: [2] + end_years: [102] + level: 'lev' + + lnd: + vars: ['ALTMAX', 'COST_NACTIVE', 'DENIT', 'EFLX_DYNBAL'] #['process_all'] + derive_vars: [] + hist_str: 'h0' + start_years: [2] + end_years: [102] + level: 'lev' + + ocn: + vars: ['taux', 'tauy'] # ['process_all'] + derive_vars: [] + hist_str: 'h.frc' + start_years: [2] + end_years: [102] + level: 'lev' + + ice: + vars: ['hi', 'hs', 'snowfrac', 'Tsfc'] #['process_all'] + derive_vars: [] + hist_str: 'h' + start_years: [2] + end_years: [102] + level: 'lev' + + glc: + vars: ['usurf', 'topg'] #['process_all'] + derive_vars: [] + hist_str: 'initial_hist' + start_years: [2] + end_years: [102] + level: 'lev' + +compute_notebooks: + + # This is where all the notebooks you want run and their + ### parameters are specified. Several examples of different + ### types of notebooks are provided. + + # The first key (here simple_no_params_nb) is the name of the + ### notebook from nb_path_root, minus the .ipynb + infrastructure: + index: + parameter_groups: + none: {} + + atm: + adf_quick_run: + parameter_groups: + none: + adf_path: ../../../externals/ADF + config_path: . + config_fil_str: "config_f.cam6_3_119.FLTHIST_ne30.r328_gamma0.33_soae.001.yaml" + +compute_scripts: + + ice: + divergence: + kernel_name: cupid-analysis + parameter_groups: + none: + dummy: thpth + product: vector.png + +########### JUPYTER BOOK CONFIG ########### + +################################## +# Jupyter Book Table of Contents # +################################## +book_toc: + + # See https://jupyterbook.org/en/stable/structure/configure.html for + ## complete documentation of Jupyter book construction options + + format: jb-book + + # All filenames are notebook filename without the .ipynb, similar to above + + root: index # root is the notebook that will be the homepage for the book + parts: + + # Parts group notebooks into different sections in the Jupyter book + ### table of contents, so you can organize different parts of your project. + + - caption: Atmosphere + + # Each chapter is the name of one of the notebooks that you executed + ### in compute_notebooks above, also without .ipynb + chapters: + - file: adf_quick_run + +##################################### +# Keys for Jupyter Book _config.yml # +##################################### +book_config_keys: + + title: Example project # Title of your jupyter book + + # Other keys can be added here, see https://jupyterbook.org/en/stable/customize/config.html + ### for many more options diff --git a/examples/nblibrary/ice/divergence.py b/examples/nblibrary/ice/divergence.py new file mode 100644 index 00000000..286fde96 --- /dev/null +++ b/examples/nblibrary/ice/divergence.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import os + +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import matplotlib.path as mpath +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from matplotlib.gridspec import GridSpec + +print(os.environ["CONDA_DEFAULT_ENV"]) + + +def pop_div(x_e, y_n, dxt, dyt, tarea): + x_e = x_e * dyt + y_n = y_n * dxt + x_w = x_e.roll(nlon=1, roll_coords=False) + y_s = y_n.shift(nlat=1) + div = (x_e - x_w + y_n - y_s) / tarea + return div + + +dummy = None +# + tags=["parameters"] +# add default values for parameters here +# + +print(dummy) + +case1 = "g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.4p2z.002branch" +var1 = "U1_1" +var2 = "V1_1" +vartitle = "POP Surface Current" +intv = 3 + +climo_path = ( + "/glade/derecho/scratch/dbailey/archive/" + case1 + "/ocn/proc/tseries/day_1/" +) + +ds1 = xr.open_dataset( + climo_path + case1 + ".pop.h.nday1." + var1 + ".20080102-20211231.nc", +) +ds2 = xr.open_dataset( + climo_path + case1 + ".pop.h.nday1." + var2 + ".20080102-20211231.nc", +) + +grid = xr.open_dataset("/glade/campaign/cesm/community/omwg/grids/gx1v7_grid.nc") +TLAT = grid["ULAT"] +TLON = grid["ULONG"] +angle = grid["ANGLE"] +tarea = grid["TAREA"] +dxt = grid["DXT"] +dyt = grid["DYT"] + +uvel1 = ds1[var1][-1::, :, :].mean(axis=0) +vvel1 = ds2[var2][-1::, :, :].mean(axis=0) + +uvel_rot1 = uvel1 * np.cos(angle) - vvel1 * np.sin(angle) +vvel_rot1 = uvel1 * np.sin(angle) + vvel1 * np.cos(angle) +# uvel_rot2 = uvel2*np.cos(angle)-vvel2*np.sin(angle) +# vvel_rot2 = uvel2*np.sin(angle)+vvel2*np.cos(angle) + +div = pop_div(uvel1, vvel1, dxt, dyt, tarea) + +print(div) + +# var_diff = var1-var2 +# var_std = var_diff.std() +# var_max = 5. +# var_min = 0. + +# make circular boundary for polar stereographic circular plots +theta = np.linspace(0, 2 * np.pi, 100) +center, radius = [0.5, 0.5], 0.5 +verts = np.vstack([np.sin(theta), np.cos(theta)]).T +circle = mpath.Path(verts * radius + center) + +# set up the figure with a North Polar Stereographic projection +fig = plt.figure(figsize=(10, 10)) +gs = GridSpec(1, 1) + +ax = fig.add_subplot(gs[0, 0], projection=ccrs.NorthPolarStereo()) +ax.set_boundary(circle, transform=ax.transAxes) +ax.add_feature(cfeature.LAND, zorder=100, edgecolor="k") + +# sets the latitude / longitude boundaries of the plot +ax.set_extent([0.005, 360, 90, 55], crs=ccrs.PlateCarree()) + +this = ax.pcolormesh( + TLON[:, :].values, + TLAT[:, :].values, + div.values, + cmap="rainbow", + transform=ccrs.PlateCarree(), +) + +this = ax.quiver( + TLON[::intv, ::intv].values, + TLAT[::intv, ::intv].values, + uvel_rot1[::intv, ::intv].values, + vvel_rot1[::intv, ::intv].values, + transform=ccrs.PlateCarree(), +) + +plt.savefig("vector.png") + +# + [markdown] +# plt.colorbar(this,orientation='vertical',fraction=0.04,pad=0.01) +# plt.title(vartitle,fontsize=10)