Files
PyPortfolioOpt/pypfopt/risk_models.py
2018-08-26 14:05:27 +01:00

217 lines
7.8 KiB
Python

"""
The ``risk_models`` module provides functions for estimating the covariance matrix given
historical returns. Because of the complexity of estimating covariance matrices
(and the importance of efficient computations), this module mostly provides a convenient
wrapper around the underrated `sklearn.covariance` module.
The format of the data input is the same as that in :ref:`expected-returns`.
**Currently implemented:**
- sample covariance
- semicovariance
- exponentially weighted covariance
- mininum covariance determinant
- shrunk covariance matrices:
- manual shrinkage
- Ledoit Wolf shrinkage
- Oracle Approximating shrinkage
"""
import warnings
import numpy as np
import pandas as pd
from sklearn import covariance
def sample_cov(prices, frequency=252):
"""
Calculate the annualised sample covariance matrix of (daily) asset returns.
:param prices: adjusted closing prices of the asset, each row is a date
and each column is a ticker/id.
:type prices: pd.DataFrame
:param frequency: number of time periods in a year, defaults to 252 (the number
of trading days in a year)
:type frequency: int, optional
:return: annualised sample covariance matrix
:rtype: pd.DataFrame
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
daily_returns = prices.pct_change().dropna(how="all")
return daily_returns.cov() * frequency
def semicovariance(prices, benchmark=0, frequency=252):
"""
Estimate the semicovariance matrix, i.e the covariance given that
the returns are less than the benchmark.
.. semicov = E([min(r_i - B, 0)] . [min(r_j - B, 0)])
:param prices: adjusted closing prices of the asset, each row is a date
and each column is a ticker/id.
:type prices: pd.DataFrame
:param benchmark: the benchmark return, defaults to 0.
:type benchmark: float
:param frequency: number of time periods in a year, defaults to 252 (the number
of trading days in a year)
:type frequency: int, optional
:return: semicovariance matrix
:rtype: pd.DataFrame
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
daily_returns = prices.pct_change().dropna(how="all")
drops = np.fmin(daily_returns - benchmark, 0)
return drops.cov() * frequency
def _pair_exp_cov(X, Y, alpha=0.5):
# TODO documentation
# alpha is the span parameter in terms of the length of the data.
# higher alpha tends towards the simple mean.
# alpha * T = 1 sets the mean = the last result
# recommended alpha = 0.5
covariation = (X - X.mean()) * (Y - Y.mean())
T = covariation.notnull().sum()
return covariation.ewm(span=alpha * T).mean()[-1]
def exp_cov(prices, alpha=0.5, frequency=252):
# TODO documentation
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
assets = prices.columns
daily_returns = prices.pct_change().dropna(how="all")
N = len(assets)
S = np.zeros((N, N))
for i in range(N):
for j in range(i, N):
S[i, j] = S[j, i] = _pair_exp_cov(
daily_returns.iloc[:, i],
daily_returns.iloc[:, j],
alpha
)
return pd.DataFrame(S * frequency, columns=assets, index=assets)
def min_cov_determinant(prices, frequency=252, random_state=None):
"""
Calculate the minimum covariance determinant, an estimator of the covariance matrix
that is more robust to noise.
:param prices: adjusted closing prices of the asset, each row is a date
and each column is a ticker/id.
:type prices: pd.DataFrame
:param frequency: number of time periods in a year, defaults to 252 (the number
of trading days in a year)
:type frequency: int, optional
:param random_state: random seed to make results reproducible, defaults to None
:type random_state: int, optional
:return: annualised estimate of covariance matrix
:rtype: pd.DataFrame
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
assets = prices.columns
X = prices.pct_change().dropna(how="all")
X = np.nan_to_num(X.values)
raw_cov_array = covariance.fast_mcd(X, random_state=random_state)[1]
return pd.DataFrame(raw_cov_array, index=assets, columns=assets) * frequency
class CovarianceShrinkage:
"""
Provide methods for computing shrinkage estimates of the covariance matrix, using the
sample covariance matrix and choosing the structured estimator to be an identity matrix
multiplied by the average sample variance. The shrinkage constant can be input manually,
though there exist methods (notably Ledoit Wolf) to estimate the optimal value.
Instance variables:
- ``X`` (returns)
- ``S`` (sample covariance matrix)
- ``delta`` (shrinkage constant)
"""
def __init__(self, prices, frequency=252):
"""
:param prices: adjusted closing prices of the asset, each row is a date and each column is a ticker/id.
:type prices: pd.DataFrame
:param frequency: number of time periods in a year, defaults to 252 (the number of trading days in a year)
:type frequency: int, optional
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
self.frequency = frequency
self.X = prices.pct_change().dropna(how="all")
self.S = self.X.cov().values
self.delta = None # shrinkage constant
def format_and_annualise(self, raw_cov_array):
"""
Helper method which annualises the output of shrinkage calculations,
and formats the result into a dataframe
:param raw_cov_array: raw covariance matrix of daily returns
:type raw_cov_array: np.ndarray
:return: annualised covariance matrix
:rtype: pd.DataFrame
"""
assets = self.X.columns
return (
pd.DataFrame(raw_cov_array, index=assets,
columns=assets) * self.frequency
)
def shrunk_covariance(self, delta=0.2):
"""
Shrink a sample covariance matrix to the identity matrix (scaled by the average
sample variance). This method does not estimate an optimal shrinkage parameter,
it requires manual input.
:param delta: shrinkage parameter, defaults to 0.2.
:type delta: float, optional
:return: shrunk sample covariance matrix
:rtype: np.ndarray
"""
self.delta = delta
N = self.S.shape[1]
# Shrinkage target
mu = np.trace(self.S) / N
F = np.identity(N) * mu
# Shrinkage
shrunk_cov = delta * F + (1 - delta) * self.S
return self.format_and_annualise(shrunk_cov)
def ledoit_wolf(self):
"""
Calculate the Ledoit-Wolf shrinkage estimate.
:return: shrunk sample covariance matrix
:rtype: np.ndarray
"""
X = np.nan_to_num(self.X.values)
shrunk_cov, self.delta = covariance.ledoit_wolf(X)
return self.format_and_annualise(shrunk_cov)
def oracle_approximating(self):
"""
Calculate the Oracle Approximating Shrinkage estimate
:return: shrunk sample covariance matrix
:rtype: np.ndarray
"""
X = np.nan_to_num(self.X.values)
shrunk_cov, self.delta = covariance.oas(X)
return self.format_and_annualise(shrunk_cov)