mostly docstrings

fenke · Jun 27, 2024 · 25eaf60 · 25eaf60
1 parent b3c3274
commit 25eaf60
Show file tree

Hide file tree

Showing 4 changed files with 412 additions and 59 deletions.
diff --git a/corebridge/__init__.py b/corebridge/__init__.py
@@ -1 +1 @@
-__version__ = "0.2.13"
+__version__ = "0.2.14"
diff --git a/corebridge/core.py b/corebridge/core.py
@@ -19,6 +19,29 @@
 
 # %% ../nbs/00_core.ipynb 7
 def set_time_index_zone(df:pd.DataFrame, timezone):
+    """
+    Sets the time zone of the index of a pandas DataFrame.
+
+    Args:
+        df (pd.DataFrame): The DataFrame whose index time zone is to be set.
+        timezone (str): The desired time zone.
+
+    Returns:
+        pd.DataFrame: The DataFrame with its index time zone set to the specified time zone.
+
+    Raises:
+        None
+
+    Examples:
+        >>> df = pd.DataFrame({'A': [1, 2, 3]}, index=pd.DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03']))
+        >>> set_time_index_zone(df, 'Europe/Berlin')
+                     A
+        2022-01-01  1
+        2022-01-02  2
+        2022-01-03  3
+        DatetimeIndex: 3 entries, 2022-01-01 01:00:00+01:00 to 2022-01-03 01:00:00+01:00
+    """
+
     if isinstance(df.index, pd.DatetimeIndex):
         df.index.name = 'time'
         if not hasattr(df.index, 'tz')  or not df.index.tz or not df.index.tz:
@@ -29,13 +52,22 @@ def set_time_index_zone(df:pd.DataFrame, timezone):
     return df
 
 
-# %% ../nbs/00_core.ipynb 9
+# %% ../nbs/00_core.ipynb 11
 def timeseries_dataframe(
         data:typing.Union[pd.DataFrame, pd.Series, dict, np.ndarray, np.recarray], 
         timezone='UTC', 
         columnnames=None):
 
-    """Convert various tabular data formats to timeseries DataFrame"""
+    """Convert various tabular data formats to timeseries DataFrame
+
+    Args:
+        data (Union[pd.DataFrame, pd.Series, dict, np.ndarray, np.recarray]): The input data to be converted.
+        timezone (str, optional): The timezone to set for the index of the DataFrame. Defaults to 'UTC'.
+        columnnames (Optional[List[str]]): The column names to use for the DataFrame. Defaults to None.
+
+    Returns:
+        pd.DataFrame: The converted timeseries DataFrame with the index set to the specified timezone.
+    """
 
     if isinstance(data, pd.DataFrame):
         df = data
@@ -44,7 +76,7 @@ def timeseries_dataframe(
         df = pd.DataFrame(data)
 
     elif isinstance(data, dict):
-        # dict/mapping of individual timeseries
+        # assume a dict/mapping of individual arrays representing timeseries 
         df = pd.DataFrame({
             C:pd.Series(data=A[:,1], index=pd.DatetimeIndex(A[:,0]*1e9)) if isinstance(A, np.ndarray) else A
             for C,A in data.items()
@@ -77,14 +109,21 @@ def timeseries_dataframe(
 
     return set_time_index_zone(df, timezone)
 
-# %% ../nbs/00_core.ipynb 11
+# %% ../nbs/00_core.ipynb 14
 def timeseries_dataframe_from_datadict(
         data:dict, 
         timecolumns=None,
         recordformat='records'):
 
-    "Convert data dict to dataframe"
-
+    """
+    Converts a data dict into a pandas DataFrame based on the specified record format. 
+    Parameters:
+        - data: A dictionary containing the data to convert.
+        - timecolumns: A list of column names to be treated as time columns.
+        - recordformat: A string specifying the format of the data records ('records', 'table', 'split', 'index', 'tight').
+    Returns:
+        - df: A pandas DataFrame with a DatetimeIndex representing the converted data.
+    """
     orient = recordformat.lower()
     assert orient in ['records', 'table', 'split', 'index', 'tight']
 
@@ -104,29 +143,52 @@ def timeseries_dataframe_from_datadict(
     df.columns = list(df.columns)
     df[time_column] = pd.to_datetime(df[time_column],utc=True,format='ISO8601')
     df.set_index(time_column, inplace=True)
-    #df.index = pd.DatetimeIndex(df.index).round('ms')
+    df.index = pd.DatetimeIndex(df.index).round('ms')
 
     df.index.name = 'time'
 
     return df
 
 
-# %% ../nbs/00_core.ipynb 14
+# %% ../nbs/00_core.ipynb 17
 def pop_nan_values(data):
+    """
+    Recursively pop keys with nan values from dict or lists with dicts.
+
+    Args:
+        data (Union[list, dict]): The data to be processed.
+
+    Returns:
+        Union[list, dict]: The processed data with keys with nan values removed.
+    """
+
     if isinstance(data, list):
         return [pop_nan_values(v) for v in data if pd.notnull([v]).any()]
     elif isinstance(data, dict):
         return {k:pop_nan_values(v) for k, v in data.items() if pd.notnull([v]).any()}
     else:
         return data
 
-# %% ../nbs/00_core.ipynb 15
+# %% ../nbs/00_core.ipynb 18
 def timeseries_dataframe_to_datadict(
         data:typing.Union[pd.DataFrame, pd.Series, dict], 
         recordformat:str='records', 
         timezone:str='UTC',
         popNaN:bool=False):
-
+
+    """
+    Convert a timeseries DataFrame or Series into a dictionary representation.
+
+    Args:
+        data (Union[pd.DataFrame, pd.Series, dict]): The input data to be converted. It can be a pandas DataFrame, Series, or a dictionary.
+        recordformat (str, optional): The format of the output records. Defaults to 'records'.
+        timezone (str, optional): The timezone to use for the DataFrame index. Defaults to 'UTC'.
+        popNaN (bool, optional): Whether to remove NaN values from the output dictionary. Defaults to False.
+
+    Returns:
+        Union[dict, list]: The converted dictionary representation of the input data. If `popNaN` is True, it returns a dictionary with NaN values removed. Otherwise, it returns a dictionary or a list of dictionaries depending on the `recordformat` parameter.
+    """
+
     orient = recordformat.lower()
 
     normalized_data = timeseries_dataframe(data, timezone=timezone)
@@ -137,18 +199,17 @@ def timeseries_dataframe_to_datadict(
         records = normalized_data.reset_index().to_dict(orient='records')
     else:
         records =  normalized_data.to_dict(orient=orient)
-
 
     if popNaN and normalized_data.isna().any(axis=None):
-        #return pop_nan_values(records)
-        return [ {k:v for k,v in m.items() if pd.notnull(v)} for m in records]
-    else:
-        return records
+            return pop_nan_values(records)
+
+    return records    
 
 
+# %% ../nbs/00_core.ipynb 25
+#def interpolate_timeseries(sampler, period, method_args):
 
 
-# %% ../nbs/00_core.ipynb 22
 ResamplerMethods = dict(
     count=lambda R: R.count(),
     median=lambda R: R.median(),
@@ -158,18 +219,26 @@ def timeseries_dataframe_to_datadict(
     sum=lambda R: R.sum(),
     std=lambda R: R.std(),
     var=lambda R: R.var(),
-    cumsum=lambda R: R.cumsum(),
-    cummax=lambda R: R.cummax(),
-    cummin=lambda R: R.cummin(),
-
+    nearest=lambda R: R.nearest(),
 )
 
 ReSamplerPeriods = dict(
     H='h', T='min', S='sec', L='ms', U='us', N='ns'
 )
 
 def timeseries_dataframe_resample(df:pd.DataFrame, period:str, method:str):
-
+    """
+    Resamples a time-series DataFrame on the specified period and method.
+
+    Parameters:
+        df (pd.DataFrame): The input time-series DataFrame.
+        period (str): The resampling period.
+        method (str): The resampling method. Can be a string of multiple methods separated by ';'.
+        method_args (dict, optional): Additional arguments for the resampling method.
+
+    Returns:
+        pd.DataFrame: The resampled DataFrame.
+    """
     sampler = df.resample(ReSamplerPeriods.get(period, str(period)))
 
     dataframes = [df]