Skip to content

Commit 7bdaaff

Browse files
committed
Add path statistics
1 parent 73a8db0 commit 7bdaaff

File tree

8 files changed

+98
-89
lines changed

8 files changed

+98
-89
lines changed

notebooks/applications/hurst.md

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,49 @@ The [Hurst exponent](https://en.wikipedia.org/wiki/Hurst_exponent) is used as a
1717

1818
It is a statistics which can be used to test if a time-series is mean reverting or it is trending.
1919

20-
```{code-cell} ipython3
21-
from quantflow.sp.cir import CIR
20+
+++
2221

23-
p = CIR(kappa=1, sigma=1)
24-
```
22+
## Study the Weiner process OHLC
2523

26-
## Study the Weiner process OHLC
24+
We want to construct a mechanism to estimate the hurst exponent via OHLC data.
25+
In order to evaluate results agains known solution we take the Weiner process as generator of timeseries. In this way we know exactly what the variance should be.
2726

2827
```{code-cell} ipython3
2928
from quantflow.sp.weiner import WeinerProcess
29+
from quantflow.utils.dates import start_of_day
3030
p = WeinerProcess(sigma=0.5)
31-
paths = p.sample(1, 1, 1000)
32-
df = paths.as_datetime_df().reset_index()
31+
paths = p.sample(1, 1, 24*60*60)
32+
paths.plot()
33+
```
34+
35+
```{code-cell} ipython3
36+
df = paths.as_datetime_df(start=start_of_day()).reset_index()
3337
df
3438
```
3539

40+
At this point we estimate the standard deviation using the **realized variance** along the path (we use the scaled flag so that the standard deviation is caled by the square-root of time step)
41+
42+
```{code-cell} ipython3
43+
float(paths.path_std(scaled=True)[0])
44+
```
45+
3646
```{code-cell} ipython3
3747
from quantflow.ta.ohlc import OHLC
48+
from dataclasses import replace
3849
from datetime import timedelta
3950
ohlc = OHLC(serie="0", period="10m", rogers_satchell_variance=True, parkinson_variance=True, garman_klass_variance=True)
40-
result = ohlc(df)
41-
result
51+
ohlc(df)
4252
```
4353

4454
```{code-cell} ipython3
45-
55+
import pandas as pd
56+
results = []
57+
for period in ("2m", "5m", "10m", "30m", "1h", "4h"):
58+
operator = ohlc.model_copy(update=dict(period=period))
59+
result = operator(df).sum()
60+
results.append(dict(period=period, pk=result["0_pk"].item(), gk=result["0_gk"].item(), rs=result["0_rs"].item()))
61+
vdf = pd.DataFrame(results)
62+
vdf
4663
```
4764

4865
# Links
@@ -66,6 +83,10 @@ from quantflow.utils.dates import utcnow
6683
pd.date_range(start=utcnow(), periods=10, freq="0.5S")
6784
```
6885

86+
```{code-cell} ipython3
87+
7*7+3*3
88+
```
89+
6990
```{code-cell} ipython3
7091
7192
```

quantflow/data/vault.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
class Vault:
5+
"""Keeps key-value pairs in a file."""
56

67
def __init__(self, path: str | Path) -> None:
78
self.path = Path(path)
@@ -17,22 +18,27 @@ def load(self) -> dict[str, str]:
1718
return data
1819

1920
def add(self, key: str, value: str) -> None:
21+
"""Add a key-value pair to the vault."""
2022
self.data[key] = value
2123
self.save()
2224

2325
def delete(self, key: str) -> bool:
26+
"""Delete a key-value pair from the vault."""
2427
if self.data.pop(key, None) is not None:
2528
self.save()
2629
return True
2730
return False
2831

2932
def get(self, key: str) -> str | None:
33+
"""Get the value of a key if available otherwise None."""
3034
return self.data.get(key)
3135

3236
def keys(self) -> list[str]:
37+
"""Get the keys in the vault."""
3338
return sorted(self.data)
3439

3540
def save(self) -> None:
41+
"""Save the data to the file."""
3642
with open(self.path, "w") as file:
3743
for key in sorted(self.data):
3844
value = self.data[key]

quantflow/ta/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
DataFrame: TypeAlias = pl.DataFrame | pd.DataFrame
77

88

9-
def to_polars(df: DataFrame) -> pl.DataFrame:
9+
def to_polars(df: DataFrame, *, copy: bool = False) -> pl.DataFrame:
1010
if isinstance(df, pd.DataFrame):
1111
return pl.DataFrame(df)
12+
elif copy:
13+
return df.clone()
1214
return df

quantflow/ta/ohlc.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
from dataclasses import dataclass
21
from datetime import timedelta
32

43
import numpy as np
54
import polars as pl
5+
from pydantic import BaseModel
66

77
from .base import DataFrame, to_polars
88

99

10-
@dataclass
11-
class OHLC:
10+
class OHLC(BaseModel):
1211
"""Aggregates OHLC data over a given period and serie
1312
1413
Optionally calculates the range-based variance estimators for the serie.
@@ -50,7 +49,7 @@ def close_col(self) -> pl.Expr:
5049
def __call__(self, df: DataFrame) -> pl.DataFrame:
5150
"""Returns a dataframe with OHLC data sampled over the given period"""
5251
result = (
53-
to_polars(df)
52+
to_polars(df, copy=True)
5453
.group_by_dynamic(self.index_column, every=self.period)
5554
.agg(
5655
pl.col(self.serie).first().alias(f"{self.serie}_open"),

quantflow/utils/dates.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,20 @@ def utcnow() -> datetime:
55
return datetime.now(timezone.utc)
66

77

8+
def as_utc(dt: date | None = None) -> datetime:
9+
if dt is None:
10+
return utcnow()
11+
elif isinstance(dt, datetime):
12+
return dt.astimezone(timezone.utc)
13+
else:
14+
return datetime(dt.year, dt.month, dt.day, tzinfo=timezone.utc)
15+
16+
817
def isoformat(date: str | date) -> str:
918
if isinstance(date, str):
1019
return date
1120
return date.isoformat()
21+
22+
23+
def start_of_day(dt: date | None = None) -> datetime:
24+
return as_utc(dt).replace(hour=0, minute=0, second=0, microsecond=0)

quantflow/utils/paths.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@ class Paths(BaseModel, arbitrary_types_allowed=True):
1919
"""Paths of a stochastic process"""
2020

2121
t: float = Field(description="time horizon")
22+
"""Time horizon - the unit of time is not specified"""
2223
data: FloatArray = Field(description="paths")
24+
"""Paths of the stochastic process"""
2325

2426
@property
2527
def dt(self) -> float:
28+
"""Time step"""
2629
return self.t / self.time_steps
2730

2831
@property
@@ -64,17 +67,42 @@ def dates(
6467
return pd.date_range(start=start, end=end, periods=self.time_steps + 1)
6568

6669
def mean(self) -> FloatArray:
67-
"""Mean of paths"""
70+
"""Paths cross-section mean"""
6871
return np.mean(self.data, axis=1)
6972

7073
def std(self) -> FloatArray:
71-
"""Standard deviation of paths"""
74+
"""Paths cross-section standard deviation"""
7275
return np.std(self.data, axis=1)
7376

7477
def var(self) -> FloatArray:
75-
"""Variance of paths"""
78+
"""Paths cross-section variance"""
7679
return np.var(self.data, axis=1)
7780

81+
def paths_mean(self, *, scaled: bool = False) -> FloatArray:
82+
"""mean for each path
83+
84+
If scaled is True, the mean is scaled by the time step
85+
"""
86+
scale = self.dt if scaled else 1.0
87+
return np.mean(self.data, axis=0) / scale
88+
89+
def path_std(self, *, scaled: bool = False) -> FloatArray:
90+
"""standard deviation for each path
91+
92+
If scaled is True, the standard deviation is scaled by the square
93+
root of the time step
94+
"""
95+
scale = np.sqrt(self.dt) if scaled else 1.0
96+
return np.std(np.diff(self.data, axis=0), axis=0) / scale
97+
98+
def path_var(self, *, scaled: bool = False) -> FloatArray:
99+
"""variance for each path
100+
101+
If scaled is True, the variance is scaled by the time step
102+
"""
103+
scale = self.dt if scaled else 1.0
104+
return np.var(np.diff(self.data, axis=0), axis=0) / scale
105+
78106
def as_datetime_df(
79107
self, *, start: datetime | None = None, unit: str = "d"
80108
) -> pd.DataFrame:

quantflow/utils/volatility.py

Lines changed: 0 additions & 71 deletions
This file was deleted.

quantflow_tests/test_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from quantflow.utils.numbers import round_to_step, to_decimal
2+
3+
4+
def test_round_to_step():
5+
assert str(round_to_step(1.234, 0.1)) == "1.2"
6+
assert str(round_to_step(1.234, 0.01)) == "1.23"
7+
assert str(round_to_step(1.236, 0.01)) == "1.24"
8+
assert str(round_to_step(1.1, 0.01)) == "1.10"
9+
assert str(round_to_step(1.1, 0.001)) == "1.100"
10+
assert str(round_to_step(2, 0.001)) == "2.000"
11+
assert str(round_to_step(to_decimal("2.00000000000"), 0.001)) == "2.000"

0 commit comments

Comments
 (0)