Skip to content

Add Support for Fama-French 3/5 Factor Model to Expected Return Module #634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
DSM2499 opened this issue May 16, 2025 · 2 comments
Open
Labels
enhancement New feature or request

Comments

@DSM2499
Copy link

DSM2499 commented May 16, 2025

Enhance the expected_returns module by adding a new function ff_expected_return() that computes expected returns using the Fama-French 3-factor and 5-factor models via OLS regression.

This will expand the current set of return estimation methods (mean_historical_return, capm_return, etc.) with a more advanced and academically grounded model, improving flexibility for users in quantitative finance applications.

def ff_expected_return(
prices: pd.DataFrame,
factor_data: pd.DataFrame,
returns_data: bool = False,
model: str = "ff3",
compounding: bool = True,
frequency: int = 252,
log_returns: bool = False,
) -> pd.Series:

@DSM2499 DSM2499 added the enhancement New feature or request label May 16, 2025
@DSM2499
Copy link
Author

DSM2499 commented May 16, 2025

def ff_expected_return(
prices,
factor_data,
returns_data=False,
model="ff3",
compounding=True,
frequency=252,
log_returns=False,
):
"""
Estimate expected returns using the Fama-French 3- or 5-Factor model.

:param prices: asset prices or returns if returns_data=True.
:type prices: pd.DataFrame
:param factor_data: DataFrame of Fama-French factors. Must include 'RF' and:
                    - ff3: 'Mkt-RF', 'SMB', 'HML'
                    - ff5: also 'RMW', 'CMA'
:type factor_data: pd.DataFrame
:param returns_data: If True, 'prices' is treated as returns data.
:type returns_data: bool
:param model: 'ff3' or 'ff5'.
:type model: str
:param compounding: Use geometric average if True, arithmetic if False.
:type compounding: bool
:param frequency: Annualization factor.
:type frequency: int
:param log_returns: If False, uses simple returns; log returns are unsupported here.
:type log_returns: bool
:return: Expected returns per asset.
:rtype: pd.Series
"""
if not isinstance(prices, pd.DataFrame):
    warnings.warn("Input prices are not in a dataframe", RuntimeWarning)
    prices = pd.DataFrame(prices)

if not isinstance(factor_data, pd.DataFrame):
    warnings.warn("Input factor_data is not in a dataframe", RuntimeWarning)

required_columns = ["RF", "Mkt-RF", "SMB", "HML"]
if model == "ff5":
    required_columns += ["RMW", "CMA"]

for col in required_columns:
    if col not in factor_data.columns:
        raise ValueError(f"Factor data must include {col}")

# Compute assest returns
if returns_data:
    returns = prices.copy()
else:
    returns = returns_from_prices(prices, log_returns)

_check_returns(returns)

# Align Index
common_index = returns.index.intersection(factor_data.index)
if len(common_index) == 0:
    raise ValueError("No overlapping dates between returns and factor data")

returns = returns.loc[common_index]
factors = factor_data.loc[common_index]

# Compute excess returns
excess_returns = returns.sub(factors["RF"], axis=0)

# Select Regressor
reg_factor = ["Mkt-RF", "SMB", "HML"]
if model == "ff5":
    reg_factor += ["RMW", "CMA"]

X = sm.add_constant(factors[reg_factor])
expected_returns = {}

for asset in excess_returns.columns:
    y = excess_returns[asset]
    model = sm.OLS(y, X).fit()
    predicted = model.predict(X)
    if compounding:
        expected_return = ((1 + predicted).prod()) ** (
            frequency / len(predicted)
        ) - 1
    else:
        expected_return = predicted.mean() * frequency
    expected_returns[asset] = expected_return

return pd.Series(expected_returns)

@DSM2499
Copy link
Author

DSM2499 commented May 16, 2025

Test for enhancement

def test_ff3_expected_return_valid():
df = get_data().iloc[:100, :3] # limit assests and rows for speed
dates = df.index

factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(df, factors, model="ff3")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()

def test_ff5_expected_return_valid():
df = get_data().iloc[:100, :3]
dates = df.index

# Generate mock Fama-French 5-factor data
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
        "RMW": np.random.normal(0.0001, 0.0003, size=100),
        "CMA": np.random.normal(0.0001, 0.0003, size=100),
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(df, factors, model="ff5")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()

def test_ff_expected_return_missing_factors():
df = get_data().iloc[:100, :3]
dates = df.index

# Missing HML factor
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
    },
    index=dates,
)

with pytest.raises(ValueError) as excinfo:
    expected_returns.ff_expected_return(df, factors, model="ff3")
assert "Missing required factor: HML" in str(excinfo.value)

def test_ff_expected_return_no_overlap():
df = get_data().iloc[:100, :3]

# Shift factor index so no overlap
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=pd.date_range("2020-01-01", periods=100, freq="B"),
)

with pytest.raises(ValueError) as excinfo:
    expected_returns.ff_expected_return(df, factors, model="ff3")
assert "No overlapping dates" in str(excinfo.value)

def test_ff_expected_return_compounding_toggle():
df = get_data().iloc[:100, :3]
dates = df.index

factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=dates,
)

mu_geom = expected_returns.ff_expected_return(df, factors, compounding=True)
mu_arith = expected_returns.ff_expected_return(df, factors, compounding=False)
assert not mu_geom.equals(mu_arith)

def test_ff3_expected_return_static():
dates = pd.date_range("2022-01-01", periods=5, freq="D")

# Sample asset prices for 3 assets
prices = pd.DataFrame(
    {
        "Asset A": [100, 100.5, 101, 101.5, 102],
        "Asset B": [50, 50.25, 50.5, 50.75, 51],
        "Asset C": [200, 199.5, 199.8, 200.1, 200.5],
    },
    index=dates,
)

# Matching FF3 data
factors = pd.DataFrame(
    {
        "RF": [0.0001, 0.0001, 0.0001, 0.0001, 0.0001],
        "Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
        "SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
        "HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(
    prices, factors, model="ff3", compounding=False, frequency=252
)

assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
# Check rough bounds for sanity
assert mu.min() > -0.5
assert mu.max() < 10

def test_ff5_expected_return_static():
dates = pd.date_range("2022-01-01", periods=5, freq="D")

prices = pd.DataFrame(
    {
        "Asset A": [100, 100.5, 101, 101.5, 102],
        "Asset B": [50, 50.25, 50.5, 50.75, 51],
        "Asset C": [200, 199.5, 199.8, 200.1, 200.5],
    },
    index=dates,
)

factors = pd.DataFrame(
    {
        "RF": [0.0001] * 5,
        "Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
        "SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
        "HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
        "RMW": [0.0002, 0.0001, 0.0003, -0.0001, 0.0002],
        "CMA": [0.0003, -0.0001, 0.0001, 0.0002, 0.0003],
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(
    prices, factors, model="ff5", compounding=False, frequency=252
)

assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
assert mu.min() > -0.5
assert mu.max() < 10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

1 participant