|
| 1 | +import logging |
| 2 | +import shutil |
| 3 | +from pathlib import Path |
| 4 | +from typing import List, Optional |
| 5 | + |
| 6 | +import pandas as pd |
| 7 | +import xtgeo |
| 8 | + |
| 9 | +from webviz_subsurface._utils.enum_shim import StrEnum |
| 10 | +from webviz_subsurface._utils.perf_timer import PerfTimer |
| 11 | + |
| 12 | +from ._polygon_discovery import PolygonsFileInfo |
| 13 | +from .ensemble_polygon_provider import ( |
| 14 | + EnsemblePolygonProvider, |
| 15 | + PolygonsAddress, |
| 16 | + SimulatedPolygonsAddress, |
| 17 | +) |
| 18 | + |
| 19 | +LOGGER = logging.getLogger(__name__) |
| 20 | + |
| 21 | +REL_SIM_DIR = "sim" |
| 22 | + |
| 23 | + |
| 24 | +# pylint: disable=too-few-public-methods |
| 25 | +class Col: |
| 26 | + TYPE = "type" |
| 27 | + REAL = "real" |
| 28 | + ATTRIBUTE = "attribute" |
| 29 | + NAME = "name" |
| 30 | + ORIGINAL_PATH = "original_path" |
| 31 | + REL_PATH = "rel_path" |
| 32 | + |
| 33 | + |
| 34 | +class PolygonType(StrEnum): |
| 35 | + SIMULATED = "simulated" |
| 36 | + HAZARDUOUS_BOUNDARY = "hazarduous_boundary" |
| 37 | + CONTAINMENT_BOUNDARY = "containment_boundary" |
| 38 | + |
| 39 | + |
| 40 | +class ProviderImplFile(EnsemblePolygonProvider): |
| 41 | + def __init__( |
| 42 | + self, |
| 43 | + provider_id: str, |
| 44 | + provider_dir: Path, |
| 45 | + polygon_inventory_df: pd.DataFrame, |
| 46 | + ) -> None: |
| 47 | + self._provider_id = provider_id |
| 48 | + self._provider_dir = provider_dir |
| 49 | + self._inventory_df = polygon_inventory_df |
| 50 | + |
| 51 | + @staticmethod |
| 52 | + # pylint: disable=too-many-locals |
| 53 | + def write_backing_store( |
| 54 | + storage_dir: Path, |
| 55 | + storage_key: str, |
| 56 | + sim_polygons: List[PolygonsFileInfo], |
| 57 | + ) -> None: |
| 58 | + timer = PerfTimer() |
| 59 | + |
| 60 | + # All data for this provider will be stored inside a sub-directory |
| 61 | + # given by the storage key |
| 62 | + provider_dir = storage_dir / storage_key |
| 63 | + LOGGER.debug(f"Writing polygon backing store to: {provider_dir}") |
| 64 | + provider_dir.mkdir(parents=True, exist_ok=True) |
| 65 | + (provider_dir / REL_SIM_DIR).mkdir(parents=True, exist_ok=True) |
| 66 | + |
| 67 | + type_arr: List[PolygonType] = [] |
| 68 | + real_arr: List[int] = [] |
| 69 | + attribute_arr: List[str] = [] |
| 70 | + name_arr: List[str] = [] |
| 71 | + rel_path_arr: List[str] = [] |
| 72 | + original_path_arr: List[str] = [] |
| 73 | + |
| 74 | + for polygon_info in sim_polygons: |
| 75 | + rel_path_in_store = _compose_rel_sim_polygons_path( |
| 76 | + real=polygon_info.real, |
| 77 | + attribute=polygon_info.attribute, |
| 78 | + name=polygon_info.name, |
| 79 | + extension=Path(polygon_info.path).suffix, |
| 80 | + ) |
| 81 | + type_arr.append(PolygonType.SIMULATED) |
| 82 | + real_arr.append(polygon_info.real) |
| 83 | + attribute_arr.append(polygon_info.attribute) |
| 84 | + name_arr.append(polygon_info.name) |
| 85 | + rel_path_arr.append(str(rel_path_in_store)) |
| 86 | + original_path_arr.append(polygon_info.path) |
| 87 | + |
| 88 | + LOGGER.debug(f"Copying {len(original_path_arr)} polygons into backing store...") |
| 89 | + timer.lap_s() |
| 90 | + _copy_polygons_into_provider_dir(original_path_arr, rel_path_arr, provider_dir) |
| 91 | + et_copy_s = timer.lap_s() |
| 92 | + |
| 93 | + polygons_inventory_df = pd.DataFrame( |
| 94 | + { |
| 95 | + Col.TYPE: type_arr, |
| 96 | + Col.REAL: real_arr, |
| 97 | + Col.ATTRIBUTE: attribute_arr, |
| 98 | + Col.NAME: name_arr, |
| 99 | + Col.REL_PATH: rel_path_arr, |
| 100 | + Col.ORIGINAL_PATH: original_path_arr, |
| 101 | + } |
| 102 | + ) |
| 103 | + |
| 104 | + parquet_file_name = provider_dir / "polygons_inventory.parquet" |
| 105 | + polygons_inventory_df.to_parquet(path=parquet_file_name) |
| 106 | + |
| 107 | + LOGGER.debug( |
| 108 | + f"Wrote polygon backing store in: {timer.elapsed_s():.2f}s (" |
| 109 | + f"copy={et_copy_s:.2f}s)" |
| 110 | + ) |
| 111 | + |
| 112 | + @staticmethod |
| 113 | + def from_backing_store( |
| 114 | + storage_dir: Path, |
| 115 | + storage_key: str, |
| 116 | + ) -> Optional["ProviderImplFile"]: |
| 117 | + provider_dir = storage_dir / storage_key |
| 118 | + parquet_file_name = provider_dir / "polygons_inventory.parquet" |
| 119 | + |
| 120 | + try: |
| 121 | + polygons_inventory_df = pd.read_parquet(path=parquet_file_name) |
| 122 | + return ProviderImplFile(storage_key, provider_dir, polygons_inventory_df) |
| 123 | + except FileNotFoundError: |
| 124 | + return None |
| 125 | + |
| 126 | + def provider_id(self) -> str: |
| 127 | + return self._provider_id |
| 128 | + |
| 129 | + def attributes(self) -> List[str]: |
| 130 | + return sorted(list(self._inventory_df[Col.ATTRIBUTE].unique())) |
| 131 | + |
| 132 | + def fault_polygons_names_for_attribute(self, polygons_attribute: str) -> List[str]: |
| 133 | + return sorted( |
| 134 | + list( |
| 135 | + self._inventory_df.loc[ |
| 136 | + self._inventory_df[Col.ATTRIBUTE] == polygons_attribute |
| 137 | + ][Col.NAME].unique() |
| 138 | + ) |
| 139 | + ) |
| 140 | + |
| 141 | + def realizations(self) -> List[int]: |
| 142 | + unique_reals = self._inventory_df[Col.REAL].unique() |
| 143 | + |
| 144 | + # Sort and strip out any entries with real == -1 |
| 145 | + return sorted([r for r in unique_reals if r >= 0]) |
| 146 | + |
| 147 | + def get_polygons( |
| 148 | + self, |
| 149 | + address: PolygonsAddress, |
| 150 | + ) -> Optional[xtgeo.Polygons]: |
| 151 | + if isinstance(address, SimulatedPolygonsAddress): |
| 152 | + return self._get_simulated_polygons(address) |
| 153 | + |
| 154 | + raise TypeError("Unknown type of fault polygons address") |
| 155 | + |
| 156 | + def _get_simulated_polygons( |
| 157 | + self, address: SimulatedPolygonsAddress |
| 158 | + ) -> Optional[xtgeo.Polygons]: |
| 159 | + """Returns a Xtgeo fault polygons instance of a single realization fault polygons""" |
| 160 | + |
| 161 | + timer = PerfTimer() |
| 162 | + |
| 163 | + polygons_fns: List[Path] = self._locate_simulated_polygons( |
| 164 | + attribute=address.attribute, |
| 165 | + name=address.name, |
| 166 | + realizations=[address.realization], |
| 167 | + ) |
| 168 | + |
| 169 | + if len(polygons_fns) == 0: |
| 170 | + LOGGER.warning(f"No simulated polygons found for {address}") |
| 171 | + return None |
| 172 | + if len(polygons_fns) > 1: |
| 173 | + LOGGER.warning( |
| 174 | + f"Multiple simulated polygonss found for: {address}" |
| 175 | + "Returning first fault polygons." |
| 176 | + ) |
| 177 | + |
| 178 | + if polygons_fns[0].suffix == ".csv": |
| 179 | + polygons = xtgeo.Polygons(pd.read_csv(polygons_fns[0])) |
| 180 | + else: |
| 181 | + polygons = xtgeo.polygons_from_file(polygons_fns[0]) |
| 182 | + |
| 183 | + LOGGER.debug(f"Loaded simulated fault polygons in: {timer.elapsed_s():.2f}s") |
| 184 | + |
| 185 | + return polygons |
| 186 | + |
| 187 | + def _locate_simulated_polygons( |
| 188 | + self, attribute: str, name: str, realizations: List[int] |
| 189 | + ) -> List[Path]: |
| 190 | + """Returns list of file names matching the specified filter criteria""" |
| 191 | + df = self._inventory_df.loc[ |
| 192 | + self._inventory_df[Col.TYPE] == PolygonType.SIMULATED |
| 193 | + ] |
| 194 | + |
| 195 | + df = df.loc[ |
| 196 | + (df[Col.ATTRIBUTE] == attribute) |
| 197 | + & (df[Col.NAME] == name) |
| 198 | + & (df[Col.REAL].isin(realizations)) |
| 199 | + ] |
| 200 | + |
| 201 | + return [self._provider_dir / rel_path for rel_path in df[Col.REL_PATH]] |
| 202 | + |
| 203 | + |
| 204 | +def _copy_polygons_into_provider_dir( |
| 205 | + original_path_arr: List[str], |
| 206 | + rel_path_arr: List[str], |
| 207 | + provider_dir: Path, |
| 208 | +) -> None: |
| 209 | + for src_path, dst_rel_path in zip(original_path_arr, rel_path_arr): |
| 210 | + # LOGGER.debug(f"copying fault polygons from: {src_path}") |
| 211 | + shutil.copyfile(src_path, provider_dir / dst_rel_path) |
| 212 | + |
| 213 | + # full_dst_path_arr = [storage_dir / dst_rel_path for dst_rel_path in store_path_arr] |
| 214 | + # with ProcessPoolExecutor() as executor: |
| 215 | + # executor.map(shutil.copyfile, original_path_arr, full_dst_path_arr) |
| 216 | + |
| 217 | + |
| 218 | +def _compose_rel_sim_polygons_path( |
| 219 | + real: int, |
| 220 | + attribute: str, |
| 221 | + name: str, |
| 222 | + extension: str, |
| 223 | +) -> Path: |
| 224 | + """Compose path to simulated fault polygons file, relative to provider's directory""" |
| 225 | + fname = f"{real}--{name}--{attribute}{extension}" |
| 226 | + return Path(REL_SIM_DIR) / fname |
0 commit comments