Merge pull request #10 from CMCC-Foundation/witoil_copernicus_submod

Updating download and config files
CMCC-Foundation · Feb 6, 2025 · aa96d58 · aa96d58
2 parents 9caa119 + b5c32e2
commit aa96d58
Show file tree

Hide file tree

Showing 3 changed files with 207 additions and 76 deletions.
diff --git a/config.toml b/config.toml
@@ -8,11 +8,7 @@
     spill_lon = [35.90] # lon of oil spill (deg E)
     spill_duration = [0.0] # duration of oil spill HOURS. = 0.0 for instantaneous release
     spill_rate = [27.78] # spill rate TONS/HOUR
-    slick_age = [0.0] # age of oil slick in HOURS
     oil = [28] # either oil api (number) or name (string), e.g. ["Ragusa"]. Please, consider that name must be exact.
-    area_spill = false
-    area_vertex = false # comprehends thre levels of lists. 1st: all slicks. 2nd: individual slick. 3rd: Coordinates of each vertex in each individual slick
-    multiple_slick = false
     advanced_parameters = false # if = true, user must provide parameters.toml file
     advanced_parameters_path = "WITOIL_iMagine/src/parameters.toml" # this path shuld be provided only if "advanced_parameters" is = true
 [download]
@@ -34,9 +30,6 @@
     [input_files.metoce]
         oce_data_path = false # to provide if dowload_curr = false
         met_data_path = false # to provide if dowload_wind = false
-    [input_files.shapefile]
-        shape_path = false # add "path/to/shapefile" in case you want to start from shapefile.
-        # set shape_path = false or "none" if you do not want to start from a shapefile
 [run_options]
     preprocessing = true # = false if no preprocessing at all should be performed
     preprocessing_metoce = true # MET/OCE data preprocessing

diff --git a/src/download/download_copernicus_parser.py b/src/download/download_copernicus_parser.py
@@ -1,7 +1,6 @@
 import copernicusmarine
 import argparse
 import os
-import subprocess
 import datetime
 import pandas as pd
 import xarray as xr
@@ -26,18 +25,25 @@ def download_copernicus(
     password,
 ):
 
+    # Ensure start_time and end_time are timezone-aware
+    start_time = start_time.tz_localize("UTC")
+    end_time = end_time.tz_localize("UTC")
+
     if region == "global":
 
-        if end_time < pd.to_datetime("2022-01-01"):
-            if end_time > pd.to_datetime("2021-06-30"):
+        if end_time < pd.to_datetime("2022-06-01").tz_localize("UTC"):
+            if end_time > pd.to_datetime("2021-06-30").tz_localize("UTC"):
                 dataset_id = "cmems_mod_glo_phy_myint_0.083deg_P1D-m"
             else:
                 dataset_id = "cmems_mod_glo_phy_my_0.083deg_P1D-m"
             output_name = output_name.format("reanalysis")
         else:
-            dataset_id = "cmems_mod_glo_phy_anfc_0.083deg_P1D-m"
+            dataset_id = "cmems_mod_glo_phy_anfc_0.083deg_PT1H-m"
             output_name = output_name.format("analysis")
 
+        # Normalized date to be sure to get enough time slices of dataset
+        start_time = start_time.normalize()
+
         copernicusmarine.subset(
             dataset_id=dataset_id,
             variables=["uo", "vo", "thetao"],
@@ -53,7 +59,6 @@ def download_copernicus(
             output_directory=output_path,
             username=user,
             password=password,
-            force_download=True,
         )
 
         # Transform to medslik standards
@@ -64,6 +69,7 @@ def download_copernicus(
 
         # Selecting only 4 layers
         ds = ds.sel(depth=[0, 10, 30, 120], method="nearest")
+
         # Modifying labels to simplfy drop in temperature columns
         ds["depth"] = [0, 10, 30, 120]
 
@@ -80,79 +86,211 @@ def download_copernicus(
 
     else:
 
-        if end_time < pd.to_datetime("2021-11-01"):
+        if end_time < pd.to_datetime("2022-11-01").tz_localize("UTC"):
             dataset_id_curr = "med-cmcc-cur-rean-h"
             dataset_id_temp = "med-cmcc-tem-rean-d"
             output_name = output_name.format("reanalysis")
-        else:
-            dataset_id_curr = "cmems_mod_med_phy-cur_anfc_4.2km-2D_PT1H-m"
-            dataset_id_temp = "cmems_mod_med_phy-tem_anfc_4.2km-2D_PT1H-m"
-            output_name = output_name.format("analysis")
 
-        files = []
-        for dataset in [dataset_id_curr, dataset_id_temp]:
-
-            if "cur" in dataset:
-                copernicusmarine.subset(
-                    dataset_id=dataset_id_curr,
-                    variables=["uo", "vo"],
-                    minimum_longitude=min_lon,
-                    maximum_longitude=max_lon,
-                    minimum_latitude=min_lat,
-                    maximum_latitude=max_lat,
-                    start_datetime=start_time,
-                    end_datetime=end_time,
-                    minimum_depth=min_depth,
-                    maximum_depth=max_depth,
-                    output_filename="curr.nc",
-                    output_directory=output_path,
-                    username=user,
-                    password=password,
-                    force_download=True,
-                )
-
-                files.append(output_path + "curr.nc")
-            else:
-                copernicusmarine.subset(
-                    dataset_id=dataset_id_temp,
-                    variables=["thetao"],
-                    minimum_longitude=min_lon,
-                    maximum_longitude=max_lon,
-                    minimum_latitude=min_lat,
-                    maximum_latitude=max_lat,
-                    start_datetime=start_time,
-                    end_datetime=end_time,
-                    output_filename="temp.nc",
-                    output_directory=output_path,
-                    username=user,
-                    password=password,
-                    force_download=True,
-                )
-
-                files.append(output_path + "temp.nc")
+            files = []
+            for dataset in [dataset_id_curr, dataset_id_temp]:
 
-        # Transform to medslik standards
-        ds = xr.open_mfdataset(files)
+                if "cur" in dataset:
+                    copernicusmarine.subset(
+                        dataset_id=dataset_id_curr,
+                        variables=["uo", "vo"],
+                        minimum_longitude=min_lon,
+                        maximum_longitude=max_lon,
+                        minimum_latitude=min_lat,
+                        maximum_latitude=max_lat,
+                        start_datetime=start_time,
+                        end_datetime=end_time,
+                        output_filename="curr.nc",
+                        output_directory=output_path,
+                        username=user,
+                        password=password,
+                    )
+                    # Add depth dimension (0 m) to 2D dataset
+                    ds = xr.open_dataset(f"{output_path}curr.nc")
+                    ds = ds.expand_dims(depth=[0])
+                    ds.to_netcdf(f"{output_path}curr.nc")
 
-        # Rename variables only if they exist in the dataset
-        ds = Utils.rename_netcdf_variables_mdk3(ds)
+                    files.append(output_path + "curr.nc")
+                else:
+                    copernicusmarine.subset(
+                        dataset_id=dataset_id_temp,
+                        variables=["thetao"],
+                        minimum_longitude=min_lon,
+                        maximum_longitude=max_lon,
+                        minimum_latitude=min_lat,
+                        maximum_latitude=max_lat,
+                        start_datetime=start_time,
+                        end_datetime=end_time,
+                        minimum_depth=min_depth,
+                        maximum_depth=max_depth,
+                        output_filename="temp.nc",
+                        output_directory=output_path,
+                        username=user,
+                        password=password,
+                    )
 
-        # Selecting only 4 layers
-        try:
+                    files.append(output_path + "temp.nc")
+
+            # Transform to medslik standards
+            ds = xr.open_mfdataset(files)
+
+            # Rename variables only if they exist in the dataset
+            ds = Utils.rename_netcdf_variables_mdk3(ds)
+
+            # Selecting only 4 layers
             ds = ds.sel(depth=[0, 10, 30, 120], method="nearest")
+
             # Modifying labels to simplfy drop in temperature columns
             ds["depth"] = [0, 10, 30, 120]
-        except:
-            ds = ds.expand_dims(dim={"depth": [0, 10, 30, 120]})
 
-        # Selecting only the relavent variables
-        ds = ds[["uo", "vo", "thetao"]]
+            # Selecting only the relavent variables
+            ds = ds[["uo", "vo", "thetao"]]
 
-        # saves the daily current or temperature netcdf in the case dir
-        ds.to_netcdf(output_name)
+            # saves the daily current or temperature netcdf in the case dir
+            ds.to_netcdf(output_name)
 
-        # remove the temporary files
-        temp_files = [os.path.join(output_path, "curr.nc"), os.path.join(output_path, "temp.nc")]
-        for temp_file in temp_files:
-            if os.path.exists(temp_file):
-                os.remove(temp_file)
+            # remove the temporary files
+            temp_files = [os.path.join(output_path, "curr.nc"), os.path.join(output_path, "temp.nc")]
+            for temp_file in temp_files:
+                if os.path.exists(temp_file):
+                    os.remove(temp_file)
+
+        else:
+            if end_time < pd.to_datetime("2024-10-14").tz_localize("UTC"):
+                dataset_id_curr = "cmems_mod_med_phy-cur_anfc_4.2km-2D_PT1H-m"
+                dataset_id_temp = "cmems_mod_med_phy-tem_anfc_4.2km-2D_PT1H-m"
+                output_name = output_name.format("analysis")
+
+                files = []
+                for dataset in [dataset_id_curr, dataset_id_temp]:
+
+                    if "cur" in dataset:
+                        copernicusmarine.subset(
+                            dataset_id=dataset_id_curr,
+                            variables=["uo", "vo"],
+                            minimum_longitude=min_lon,
+                            maximum_longitude=max_lon,
+                            minimum_latitude=min_lat,
+                            maximum_latitude=max_lat,
+                            start_datetime=start_time,
+                            end_datetime=end_time,
+                            output_filename="curr.nc",
+                            output_directory=output_path,
+                            username=user,
+                            password=password,
+                        )
+
+                        files.append(output_path + "curr.nc")
+                    else:
+                        copernicusmarine.subset(
+                            dataset_id=dataset_id_temp,
+                            variables=["thetao"],
+                            minimum_longitude=min_lon,
+                            maximum_longitude=max_lon,
+                            minimum_latitude=min_lat,
+                            maximum_latitude=max_lat,
+                            start_datetime=start_time,
+                            end_datetime=end_time,
+                            output_filename="temp.nc",
+                            output_directory=output_path,
+                            username=user,
+                            password=password,
+                        )
+
+                        files.append(output_path + "temp.nc")
+
+                # Transform to medslik standards
+                ds = xr.open_mfdataset(files)
+
+                # Rename variables only if they exist in the dataset
+                ds = Utils.rename_netcdf_variables_mdk3(ds)
+
+                # Assigning depth dimension
+                ds = ds.expand_dims(dim={"depth": [0, 10, 30, 120]})
+
+                # Selecting only the relavent variables
+                ds = ds[["uo", "vo", "thetao"]]
+
+                # saves the daily current or temperature netcdf in the case dir
+                ds.to_netcdf(output_name)
+
+                # remove the temporary files
+                temp_files = [os.path.join(output_path, "curr.nc"), os.path.join(output_path, "temp.nc")]
+                for temp_file in temp_files:
+                    if os.path.exists(temp_file):
+                        os.remove(temp_file)
+
+            else:
+                dataset_id_curr = "cmems_mod_med_phy-cur_anfc_4.2km-3D_PT1H-m"
+                dataset_id_temp = "cmems_mod_med_phy-tem_anfc_4.2km-3D_PT1H-m"
+                output_name = output_name.format("analysis")
+
+                files = []
+                for dataset in [dataset_id_curr, dataset_id_temp]:
+
+                    if "cur" in dataset:
+                        copernicusmarine.subset(
+                            dataset_id=dataset_id_curr,
+                            variables=["uo", "vo"],
+                            minimum_longitude=min_lon,
+                            maximum_longitude=max_lon,
+                            minimum_latitude=min_lat,
+                            maximum_latitude=max_lat,
+                            start_datetime=start_time,
+                            end_datetime=end_time,
+                            minimum_depth=min_depth,
+                            maximum_depth=max_depth,
+                            output_filename="curr.nc",
+                            output_directory=output_path,
+                            username=user,
+                            password=password,
+                        )
+
+                        files.append(output_path + "curr.nc")
+                    else:
+                        copernicusmarine.subset(
+                            dataset_id=dataset_id_temp,
+                            variables=["thetao"],
+                            minimum_longitude=min_lon,
+                            maximum_longitude=max_lon,
+                            minimum_latitude=min_lat,
+                            maximum_latitude=max_lat,
+                            start_datetime=start_time,
+                            end_datetime=end_time,
+                            minimum_depth=min_depth,
+                            maximum_depth=max_depth,
+                            output_filename="temp.nc",
+                            output_directory=output_path,
+                            username=user,
+                            password=password,
+                        )
+
+                        files.append(output_path + "temp.nc")
+
+                # Transform to medslik standards
+                ds = xr.open_mfdataset(files)
+
+                # Rename variables only if they exist in the dataset
+                ds = Utils.rename_netcdf_variables_mdk3(ds)
+
+                # Selecting only 4 layers
+                ds = ds.sel(depth=[0, 10, 30, 120], method="nearest")
+
+                # Modifying labels to simplfy drop in temperature columns
+                ds["depth"] = [0, 10, 30, 120]
+
+                # Selecting only the relavent variables
+                ds = ds[["uo", "vo", "thetao"]]
+
+                # saves the daily current or temperature netcdf in the case dir
+                ds.to_netcdf(output_name)
+
+                # remove the temporary files
+                temp_files = [os.path.join(output_path, "curr.nc"), os.path.join(output_path, "temp.nc")]
+                for temp_file in temp_files:
+                    if os.path.exists(temp_file):
+                        os.remove(temp_file)
diff --git a/src/download/download_era5_parser.py b/src/download/download_era5_parser.py
@@ -29,7 +29,7 @@ def write_cds(token):
 def get_era5(xmin,xmax,ymin,ymax,start_date,end_date,output_path,output_name):
     server = cdsapi.Client()
 
-    days = (end_date-start_date).days
+    days = (end_date-start_date).days + 1
 
     print(ymin,ymax,xmin,xmax)