mirror of
https://github.com/kernc/backtesting.py.git
synced 2024-01-28 15:29:30 +03:00
ENH: Model-based optimization and randomized grid search (#154)
* initial commit * initial commit * added optimize_skopt to backtesting * _optomize_skopt refactor * made dimensions dynamic * added unit test-apply PR comments * added heatmap to skopt and update unit tests * removed eggs folder * remove egg folder * fixed gitignore * add scikit-optimize dependancy for test * comment out pickle TRUE * fixed flake8 errors * added skopt to Parameter Heatmap notebook * Revert unwanted changes * Fixup .gitignore * Reword docstring * Refactor Backtest.optimize() code * make Backtest.optimize() arguments kw-only * add random_state for reproducible results * ensure function arguments consistency * ensure all kwargs have values * make scikit-optimize package optional * cast timedelta/datetime dimensions to int * cache objective_function evaluations (avoid warning) * ensure param combo matches constraint= * adjust skopt.forest_minimize() params * return ordering: stats, heatmap, optimize_result * clean heatmap and optimize_result * Make max_tries for method=grid be randomized search * Update example notebook * doc/build.sh: unescape URLs * mypy happy * minor restyle * fix typo * Add changelog entry Co-authored-by: Tony Freeman <tfreeman@approachci.com> Co-authored-by: Kernc <kerncece@gmail.com>
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -15,5 +15,9 @@ htmlcov/*
|
||||
doc/build/*
|
||||
|
||||
.idea/*
|
||||
.vscode/
|
||||
|
||||
**/.ipynb_checkpoints
|
||||
*~*
|
||||
|
||||
.venv/
|
||||
|
||||
@@ -6,6 +6,8 @@ These were the major changes contributing to each release:
|
||||
|
||||
### 0.x.x
|
||||
|
||||
* Faster [model-based optimization](https://kernc.github.io/backtesting.py/doc/examples/Parameter%20Heatmap%20&%20Optimization.html#Model-based%20optimization) using scikit-optimize (#154)
|
||||
* Optionally faster [optimization](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.optimize) by randomized grid search (#154)
|
||||
* _Annualized_ Return/Volatility/Sharpe/Sortino/Calmar stats (#156)
|
||||
* Auto close open trades on backtest finish
|
||||
* Add `Backtest.plot(plot_return=)`, akin to `plot_equity=`
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
* [Library of Utilities and Composable Base Strategies](../examples/Strategies Library.html)
|
||||
* [Multiple Time Frames](../examples/Multiple Time Frames.html)
|
||||
* [Parameter Heatmap](../examples/Parameter Heatmap.html)
|
||||
* [**Parameter Heatmap & Optimization**](../examples/Parameter Heatmap & Optimization.html)
|
||||
* [Trading with Machine Learning](../examples/Trading with Machine Learning.html)
|
||||
|
||||
These tutorials are also available as live Jupyter notebooks:
|
||||
|
||||
@@ -12,8 +12,8 @@ import warnings
|
||||
from abc import abstractmethod, ABCMeta
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from copy import copy
|
||||
from functools import partial
|
||||
from itertools import repeat, product, chain
|
||||
from functools import lru_cache, partial
|
||||
from itertools import repeat, product, chain, compress
|
||||
from math import copysign
|
||||
from numbers import Number
|
||||
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Type, Union
|
||||
@@ -1176,15 +1176,20 @@ class Backtest:
|
||||
self._results = self._compute_stats(broker, strategy)
|
||||
return self._results
|
||||
|
||||
def optimize(self,
|
||||
def optimize(self, *,
|
||||
maximize: Union[str, Callable[[pd.Series], float]] = 'SQN',
|
||||
method: str = 'grid',
|
||||
max_tries: Union[int, float] = None,
|
||||
constraint: Callable[[dict], bool] = None,
|
||||
return_heatmap: bool = False,
|
||||
**kwargs) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
|
||||
return_optimization: bool = False,
|
||||
random_state: int = None,
|
||||
**kwargs) -> Union[pd.Series,
|
||||
Tuple[pd.Series, pd.Series],
|
||||
Tuple[pd.Series, pd.Series, dict]]:
|
||||
"""
|
||||
Optimize strategy parameters to an optimal combination using
|
||||
parallel exhaustive search. Returns result `pd.Series` of
|
||||
the best run.
|
||||
Optimize strategy parameters to an optimal combination.
|
||||
Returns result `pd.Series` of the best run.
|
||||
|
||||
`maximize` is a string key from the
|
||||
`backtesting.backtesting.Backtest.run`-returned results series,
|
||||
@@ -1192,6 +1197,24 @@ class Backtest:
|
||||
the higher the better. By default, the method maximizes
|
||||
Van Tharp's [System Quality Number](https://google.com/search?q=System+Quality+Number).
|
||||
|
||||
`method` is the optimization method. Currently two methods are supported:
|
||||
|
||||
* `"grid"` which does an exhaustive (or randomized) search over the
|
||||
cartesian product of parameter combinations, and
|
||||
* `"skopt"` which finds close-to-optimal strategy parameters using
|
||||
[model-based optimization], making at most `max_tries` evaluations.
|
||||
|
||||
[model-based optimization]: \
|
||||
https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html
|
||||
|
||||
`max_tries` is the maximal number of strategy runs to perform.
|
||||
If `method="grid"`, this results in randomized grid search.
|
||||
If `max_tries` is a floating value between (0, 1], this sets the
|
||||
number of runs to approximately that fraction of full grid space.
|
||||
Alternatively, if integer, it denotes the absolute maximum number
|
||||
of evaluations. If unspecified (default), grid search is exhaustive,
|
||||
whereas for `method="skopt"`, `max_tries` is set to 200.
|
||||
|
||||
`constraint` is a function that accepts a dict-like object of
|
||||
parameters (with values) and returns `True` when the combination
|
||||
is admissible to test with. By default, any parameters combination
|
||||
@@ -1203,6 +1226,20 @@ class Backtest:
|
||||
inspected or projected onto 2D to plot a heatmap
|
||||
(see `backtesting.lib.plot_heatmaps()`).
|
||||
|
||||
If `return_optimization` is True and `method = 'skopt'`,
|
||||
in addition to result series (and maybe heatmap), return raw
|
||||
[`scipy.optimize.OptimizeResult`][OptimizeResult] for further
|
||||
inspection, e.g. with [scikit-optimize]\
|
||||
[plotting tools].
|
||||
|
||||
[OptimizeResult]: \
|
||||
https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
|
||||
[scikit-optimize]: https://scikit-optimize.github.io
|
||||
[plotting tools]: https://scikit-optimize.github.io/stable/modules/plots.html
|
||||
|
||||
If you want reproducible optimization results, set `random_state`
|
||||
to a fixed integer or a `numpy.random.RandomState` object.
|
||||
|
||||
Additional keyword arguments represent strategy arguments with
|
||||
list-like collections of possible values. For example, the following
|
||||
code finds and returns the "best" of the 7 admissible (of the
|
||||
@@ -1211,10 +1248,6 @@ class Backtest:
|
||||
backtest.optimize(sma1=[5, 10, 15], sma2=[10, 20, 40],
|
||||
constraint=lambda p: p.sma1 < p.sma2)
|
||||
|
||||
.. TODO::
|
||||
Add parameter `max_tries: Union[int, float] = None` which switches
|
||||
from exhaustive grid search to random search. See notes in the source.
|
||||
|
||||
.. TODO::
|
||||
Improve multiprocessing/parallel execution on Windos with start method 'spawn'.
|
||||
"""
|
||||
@@ -1237,6 +1270,7 @@ class Backtest:
|
||||
'Series) or a function that accepts result Series '
|
||||
'and returns a number; the higher the better')
|
||||
|
||||
have_constraint = bool(constraint)
|
||||
if constraint is None:
|
||||
|
||||
def constraint(_):
|
||||
@@ -1247,6 +1281,9 @@ class Backtest:
|
||||
"of strategy parameters and returns a bool whether "
|
||||
"the combination of parameters is admissible or not")
|
||||
|
||||
if return_optimization and method != 'skopt':
|
||||
raise ValueError("return_optimization=True only valid if method='skopt'")
|
||||
|
||||
def _tuple(x):
|
||||
return x if isinstance(x, Sequence) and not isinstance(x, str) else (x,)
|
||||
|
||||
@@ -1259,76 +1296,181 @@ class Backtest:
|
||||
def __getattr__(self, item):
|
||||
return self[item]
|
||||
|
||||
param_combos = tuple(map(dict, # back to dict so it pickles
|
||||
filter(constraint, # constraints applied on our fancy dict
|
||||
map(AttrDict,
|
||||
product(*(zip(repeat(k), _tuple(v))
|
||||
for k, v in kwargs.items()))))))
|
||||
if not param_combos:
|
||||
raise ValueError('No admissible parameter combinations to test')
|
||||
def _grid_size():
|
||||
size = np.prod([len(_tuple(v)) for v in kwargs.values()])
|
||||
if size < 10_000 and have_constraint:
|
||||
size = sum(1 for p in product(*(zip(repeat(k), _tuple(v))
|
||||
for k, v in kwargs.items()))
|
||||
if constraint(AttrDict(p)))
|
||||
return size
|
||||
|
||||
if len(param_combos) > 300:
|
||||
warnings.warn(f'Searching for best of {len(param_combos)} configurations.',
|
||||
stacklevel=2)
|
||||
def _optimize_grid() -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
|
||||
rand = np.random.RandomState(random_state).random
|
||||
grid_frac = (1 if max_tries is None else
|
||||
max_tries if 0 < max_tries <= 1 else
|
||||
max_tries / _grid_size())
|
||||
param_combos = [dict(params) # back to dict so it pickles
|
||||
for params in (AttrDict(params)
|
||||
for params in product(*(zip(repeat(k), _tuple(v))
|
||||
for k, v in kwargs.items())))
|
||||
if constraint(params) # type: ignore
|
||||
and rand() <= grid_frac]
|
||||
if not param_combos:
|
||||
raise ValueError('No admissible parameter combinations to test')
|
||||
|
||||
heatmap = pd.Series(np.nan,
|
||||
name=maximize_key,
|
||||
index=pd.MultiIndex.from_tuples([p.values() for p in param_combos],
|
||||
names=next(iter(param_combos)).keys()))
|
||||
if len(param_combos) > 300:
|
||||
warnings.warn(f'Searching for best of {len(param_combos)} configurations.',
|
||||
stacklevel=2)
|
||||
|
||||
# TODO: add parameter `max_tries:Union[int, float]=None` which switches
|
||||
# exhaustive grid search to random search. This might need to avoid
|
||||
# returning NaNs in stats on runs with no trades to differentiate those
|
||||
# from non-tested parameter combos in heatmap.
|
||||
heatmap = pd.Series(np.nan,
|
||||
name=maximize_key,
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[p.values() for p in param_combos],
|
||||
names=next(iter(param_combos)).keys()))
|
||||
|
||||
def _batch(seq):
|
||||
n = np.clip(len(seq) // (os.cpu_count() or 1), 5, 300)
|
||||
for i in range(0, len(seq), n):
|
||||
yield seq[i:i + n]
|
||||
def _batch(seq):
|
||||
n = np.clip(len(seq) // (os.cpu_count() or 1), 5, 300)
|
||||
for i in range(0, len(seq), n):
|
||||
yield seq[i:i + n]
|
||||
|
||||
# Save necessary objects into "global" state; pass into concurrent executor
|
||||
# (and thus pickle) nothing but two numbers; receive nothing but numbers.
|
||||
# With start method "fork", children processes will inherit parent address space
|
||||
# in a copy-on-write manner, achieving better performance/RAM benefit.
|
||||
backtest_uuid = np.random.random()
|
||||
param_batches = list(_batch(param_combos))
|
||||
Backtest._mp_backtests[backtest_uuid] = (self, param_batches, maximize) # type: ignore
|
||||
try:
|
||||
# If multiprocessing start method is 'fork' (i.e. on POSIX), use
|
||||
# a pool of processes to compute results in parallel.
|
||||
# Otherwise (i.e. on Windos), sequential computation will be "faster".
|
||||
if mp.get_start_method(allow_none=False) == 'fork':
|
||||
with ProcessPoolExecutor() as executor:
|
||||
futures = [executor.submit(Backtest._mp_task, backtest_uuid, i)
|
||||
for i in range(len(param_batches))]
|
||||
for future in _tqdm(as_completed(futures), total=len(futures)):
|
||||
batch_index, values = future.result()
|
||||
# Save necessary objects into "global" state; pass into concurrent executor
|
||||
# (and thus pickle) nothing but two numbers; receive nothing but numbers.
|
||||
# With start method "fork", children processes will inherit parent address space
|
||||
# in a copy-on-write manner, achieving better performance/RAM benefit.
|
||||
backtest_uuid = np.random.random()
|
||||
param_batches = list(_batch(param_combos))
|
||||
Backtest._mp_backtests[backtest_uuid] = (self, param_batches, maximize) # type: ignore
|
||||
try:
|
||||
# If multiprocessing start method is 'fork' (i.e. on POSIX), use
|
||||
# a pool of processes to compute results in parallel.
|
||||
# Otherwise (i.e. on Windos), sequential computation will be "faster".
|
||||
if mp.get_start_method(allow_none=False) == 'fork':
|
||||
with ProcessPoolExecutor() as executor:
|
||||
futures = [executor.submit(Backtest._mp_task, backtest_uuid, i)
|
||||
for i in range(len(param_batches))]
|
||||
for future in _tqdm(as_completed(futures), total=len(futures)):
|
||||
batch_index, values = future.result()
|
||||
for value, params in zip(values, param_batches[batch_index]):
|
||||
heatmap[tuple(params.values())] = value
|
||||
else:
|
||||
if os.name == 'posix':
|
||||
warnings.warn("For multiprocessing support in `Backtest.optimize()` "
|
||||
"set multiprocessing start method to 'fork'.")
|
||||
for batch_index in _tqdm(range(len(param_batches))):
|
||||
_, values = Backtest._mp_task(backtest_uuid, batch_index)
|
||||
for value, params in zip(values, param_batches[batch_index]):
|
||||
heatmap[tuple(params.values())] = value
|
||||
finally:
|
||||
del Backtest._mp_backtests[backtest_uuid]
|
||||
|
||||
best_params = heatmap.idxmax()
|
||||
|
||||
if pd.isnull(best_params):
|
||||
# No trade was made in any of the runs. Just make a random
|
||||
# run so we get some, if empty, results
|
||||
stats = self.run(**param_combos[0])
|
||||
else:
|
||||
if os.name == 'posix':
|
||||
warnings.warn("For multiprocessing support in `Backtest.optimize()` "
|
||||
"set multiprocessing start method to 'fork'.")
|
||||
for batch_index in _tqdm(range(len(param_batches))):
|
||||
_, values = Backtest._mp_task(backtest_uuid, batch_index)
|
||||
for value, params in zip(values, param_batches[batch_index]):
|
||||
heatmap[tuple(params.values())] = value
|
||||
finally:
|
||||
del Backtest._mp_backtests[backtest_uuid]
|
||||
stats = self.run(**dict(zip(heatmap.index.names, best_params)))
|
||||
|
||||
best_params = heatmap.idxmax()
|
||||
if return_heatmap:
|
||||
return stats, heatmap
|
||||
return stats
|
||||
|
||||
if pd.isnull(best_params):
|
||||
# No trade was made in any of the runs. Just make a random
|
||||
# run so we get some, if empty, results
|
||||
self.run(**param_combos[0]) # type: ignore
|
||||
def _optimize_skopt() -> Union[pd.Series,
|
||||
Tuple[pd.Series, pd.Series],
|
||||
Tuple[pd.Series, pd.Series, dict]]:
|
||||
try:
|
||||
from skopt import forest_minimize
|
||||
from skopt.space import Integer, Real, Categorical
|
||||
from skopt.utils import use_named_args
|
||||
from skopt.callbacks import DeltaXStopper
|
||||
from skopt.learning import ExtraTreesRegressor
|
||||
except ImportError:
|
||||
raise ImportError("Need package 'scikit-optimize' for method='skopt'. "
|
||||
"pip install scikit-optimize")
|
||||
|
||||
nonlocal max_tries
|
||||
max_tries = (200 if max_tries is None else
|
||||
max(1, int(max_tries * _grid_size())) if 0 < max_tries <= 1 else
|
||||
max_tries)
|
||||
|
||||
dimensions = []
|
||||
for key, values in kwargs.items():
|
||||
values = np.asarray(values)
|
||||
if values.dtype.kind in 'mM': # timedelta, datetime64
|
||||
# these dtypes are unsupported in skopt, so convert to raw int
|
||||
# TODO: save dtype and convert back later
|
||||
values = values.astype(int)
|
||||
|
||||
if values.dtype.kind in 'iumM':
|
||||
dimensions.append(Integer(low=values.min(), high=values.max(), name=key))
|
||||
elif values.dtype.kind == 'f':
|
||||
dimensions.append(Real(low=values.min(), high=values.max(), name=key))
|
||||
else:
|
||||
dimensions.append(Categorical(values.tolist(), name=key, transform='onehot'))
|
||||
|
||||
# Avoid recomputing re-evaluations:
|
||||
# "The objective has been evaluated at this point before."
|
||||
# https://github.com/scikit-optimize/scikit-optimize/issues/302
|
||||
memoized_run = lru_cache()(lambda tup: self.run(**dict(tup)))
|
||||
|
||||
# np.inf/np.nan breaks sklearn, np.finfo(float).max breaks skopt.plots.plot_objective
|
||||
INVALID = 1e300
|
||||
|
||||
@use_named_args(dimensions=dimensions)
|
||||
def objective_function(**params):
|
||||
# Check constraints
|
||||
# TODO: Adjust after https://github.com/scikit-optimize/scikit-optimize/pull/971
|
||||
if not constraint(AttrDict(params)):
|
||||
return INVALID
|
||||
res = memoized_run(tuple(params.items()))
|
||||
value = -maximize(res)
|
||||
if np.isnan(value):
|
||||
return INVALID
|
||||
return value
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
'ignore', 'The objective has been evaluated at this point before.')
|
||||
|
||||
res = forest_minimize(
|
||||
func=objective_function,
|
||||
dimensions=dimensions,
|
||||
n_calls=max_tries,
|
||||
base_estimator=ExtraTreesRegressor(n_estimators=20, min_samples_leaf=2),
|
||||
acq_func='LCB',
|
||||
kappa=3,
|
||||
n_initial_points=min(max_tries, 20 + 3 * len(kwargs)),
|
||||
initial_point_generator='lhs', # 'sobel' requires n_initial_points ~ 2**N
|
||||
callback=DeltaXStopper(9e-7),
|
||||
random_state=random_state)
|
||||
|
||||
stats = self.run(**dict(zip(kwargs.keys(), res.x)))
|
||||
output = [stats]
|
||||
|
||||
if return_heatmap:
|
||||
heatmap = pd.Series(dict(zip(map(tuple, res.x_iters), -res.func_vals)),
|
||||
name=maximize_key)
|
||||
heatmap.index.names = kwargs.keys()
|
||||
heatmap = heatmap[heatmap != -INVALID]
|
||||
heatmap.sort_index(inplace=True)
|
||||
output.append(heatmap)
|
||||
|
||||
if return_optimization:
|
||||
valid = res.func_vals != INVALID
|
||||
res.x_iters = list(compress(res.x_iters, valid))
|
||||
res.func_vals = res.func_vals[valid]
|
||||
output.append(res)
|
||||
|
||||
return stats if len(output) == 1 else tuple(output)
|
||||
|
||||
if method == 'grid':
|
||||
output = _optimize_grid()
|
||||
elif method == 'skopt':
|
||||
output = _optimize_skopt()
|
||||
else:
|
||||
# Re-run best strategy so that the next .plot() call will render it
|
||||
self.run(**dict(zip(heatmap.index.names, best_params)))
|
||||
|
||||
if return_heatmap:
|
||||
return self._results, heatmap
|
||||
return self._results
|
||||
raise ValueError(f"Method should be 'grid' or 'skopt', not {method!r}")
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _mp_task(backtest_uuid, batch_index):
|
||||
|
||||
@@ -513,6 +513,7 @@ class TestOptimize(TestCase):
|
||||
self.assertRaises(TypeError, bt.optimize, maximize=15, **OPT_PARAMS)
|
||||
self.assertRaises(TypeError, bt.optimize, constraint=15, **OPT_PARAMS)
|
||||
self.assertRaises(ValueError, bt.optimize, constraint=lambda d: False, **OPT_PARAMS)
|
||||
self.assertRaises(ValueError, bt.optimize, return_optimization=True, **OPT_PARAMS)
|
||||
|
||||
res = bt.optimize(**OPT_PARAMS)
|
||||
self.assertIsInstance(res, pd.Series)
|
||||
@@ -531,6 +532,40 @@ class TestOptimize(TestCase):
|
||||
with _tempfile() as f:
|
||||
bt.plot(filename=f, open_browser=False)
|
||||
|
||||
def test_method_skopt(self):
|
||||
bt = Backtest(GOOG.iloc[:100], SmaCross)
|
||||
res, heatmap, skopt_results = bt.optimize(
|
||||
fast=range(2, 20), slow=np.arange(2, 20, dtype=object),
|
||||
constraint=lambda p: p.fast < p.slow,
|
||||
max_tries=30,
|
||||
method='skopt',
|
||||
return_optimization=True,
|
||||
return_heatmap=True,
|
||||
random_state=2)
|
||||
self.assertIsInstance(res, pd.Series)
|
||||
self.assertIsInstance(heatmap, pd.Series)
|
||||
self.assertGreater(heatmap.max(), 1.1)
|
||||
self.assertGreater(heatmap.min(), -2)
|
||||
self.assertEqual(-skopt_results.fun, heatmap.max())
|
||||
self.assertEqual(heatmap.index.tolist(), heatmap.dropna().index.unique().tolist())
|
||||
|
||||
def test_max_tries(self):
|
||||
bt = Backtest(GOOG.iloc[:100], SmaCross)
|
||||
OPT_PARAMS = dict(fast=range(2, 10, 2), slow=[2, 5, 7, 9])
|
||||
for method, max_tries, random_state in (('grid', 5, 2),
|
||||
('grid', .3, 2),
|
||||
('skopt', 7, 0),
|
||||
('skopt', .45, 0)):
|
||||
with self.subTest(method=method,
|
||||
max_tries=max_tries,
|
||||
random_state=random_state):
|
||||
_, heatmap = bt.optimize(max_tries=max_tries,
|
||||
method=method,
|
||||
random_state=random_state,
|
||||
return_heatmap=True,
|
||||
**OPT_PARAMS)
|
||||
self.assertEqual(len(heatmap), 6)
|
||||
|
||||
def test_nowrite_df(self):
|
||||
# Test we don't write into passed data df by default.
|
||||
# Important for copy-on-write in Backtest.optimize()
|
||||
|
||||
@@ -90,6 +90,7 @@ for line in sys.stdin.readlines():
|
||||
grep -v $'\t''$' |
|
||||
while read -r line; do
|
||||
while IFS=$'\t' read -r file url; do
|
||||
url=$(python -c 'import html, sys; print(html.unescape(sys.argv[-1]))' "$url")
|
||||
[ -f "$url" ] ||
|
||||
curl --silent --fail --retry 2 --user-agent 'Mozilla/5.0 Firefox 61' "$url" >/dev/null 2>&1 ||
|
||||
die "broken link in $file: $url"
|
||||
|
||||
1075
doc/examples/Parameter Heatmap & Optimization.ipynb
Normal file
1075
doc/examples/Parameter Heatmap & Optimization.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -6,7 +6,7 @@
|
||||
# extension: .py
|
||||
# format_name: light
|
||||
# format_version: '1.5'
|
||||
# jupytext_version: 1.5.1
|
||||
# jupytext_version: 1.6.0
|
||||
# kernelspec:
|
||||
# display_name: Python 3
|
||||
# language: python
|
||||
@@ -86,7 +86,12 @@ class Sma4Cross(Strategy):
|
||||
|
||||
# -
|
||||
|
||||
# It's not a robust strategy, but we can optimize it. Let's optimize our strategy on Google stock data.
|
||||
# It's not a robust strategy, but we can optimize it.
|
||||
#
|
||||
# [Grid search](https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search)
|
||||
# is an exhaustive search through a set of specified sets of values of hyperparameters. One evaluates the performance for each set of parameters and finally selects the combination that performs best.
|
||||
#
|
||||
# Let's optimize our strategy on Google stock data using _randomized_ grid search over the parameter space, evaluating at most (approximately) 200 randomly chosen combinations:
|
||||
|
||||
# +
|
||||
# %%time
|
||||
@@ -104,13 +109,15 @@ stats, heatmap = backtest.optimize(
|
||||
n_exit=range(10, 25, 5),
|
||||
constraint=lambda p: p.n_exit < p.n_enter < p.n1 < p.n2,
|
||||
maximize='Equity Final [$]',
|
||||
max_tries=200,
|
||||
random_state=0,
|
||||
return_heatmap=True)
|
||||
# -
|
||||
|
||||
# Notice `return_heatmap=True` parameter passed to
|
||||
# [`Backtest.optimize()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.optimize).
|
||||
# It makes the function return a heatmap series along with the usual stats of the best run.
|
||||
# `heatmap` is a pandas Series indexed with a MultiIndex, a cartesian product of all permissible parameter values.
|
||||
# `heatmap` is a pandas Series indexed with a MultiIndex, a cartesian product of all permissible (tried) parameter values.
|
||||
# The series values are from the `maximize=` argument we provided.
|
||||
|
||||
heatmap
|
||||
@@ -153,6 +160,59 @@ from backtesting.lib import plot_heatmaps
|
||||
plot_heatmaps(heatmap, agg='mean')
|
||||
# -
|
||||
|
||||
# ## Model-based optimization
|
||||
#
|
||||
# Above, we used _randomized grid search_ optimization method. Any kind of grid search, however, might be computationally expensive for large data sets. In the follwing example, we will use
|
||||
# [_scikit-optimize_](https://scikit-optimize.github.io)
|
||||
# package to guide our optimization better informed using forests of decision trees.
|
||||
# The hyperparameter model is sequentially improved by evaluating the expensive function (the backtest) at the next best point, thereby hopefully converging to a set of optimal parameters with as few evaluations as possible.
|
||||
#
|
||||
# So, with `method="skopt"`:
|
||||
|
||||
# +
|
||||
# %%capture
|
||||
|
||||
# ! pip install scikit-optimize # This is a run-time dependency
|
||||
|
||||
# +
|
||||
# %%time
|
||||
|
||||
stats_skopt, heatmap, optimize_result = backtest.optimize(
|
||||
n1=[10, 100], # Note: For method="skopt", we
|
||||
n2=[20, 200], # only need interval end-points
|
||||
n_enter=[10, 40],
|
||||
n_exit=[10, 30],
|
||||
constraint=lambda p: p.n_exit < p.n_enter < p.n1 < p.n2,
|
||||
maximize='Equity Final [$]',
|
||||
method='skopt',
|
||||
max_tries=200,
|
||||
random_state=0,
|
||||
return_heatmap=True,
|
||||
return_optimization=True)
|
||||
# -
|
||||
|
||||
heatmap.sort_values().iloc[-3:]
|
||||
|
||||
# Notice how the optimization runs somewhat slower even though `max_tries=` is the same. But that's due to the sequential nature of the algorithm and should actually perform rather comparably even in cases of _much larger parameter spaces_ where grid search would effectively blow up, but likely (hopefully) reaching a better local optimum than a randomized search would.
|
||||
# A note of warning, again, to take steps to avoid
|
||||
# [overfitting](https://en.wikipedia.org/wiki/Overfitting)
|
||||
# insofar as possible.
|
||||
#
|
||||
# Understanding the impact of each parameter on the computed objective function is easy in two dimensions, but as the number of dimensions grows, partial dependency plots are increasingly useful.
|
||||
# [Plotting tools from _scikit-optimize_](https://scikit-optimize.github.io/stable/modules/plots.html)
|
||||
# take care of many of the more mundane things needed to make good and informative plots of the parameter space:
|
||||
|
||||
# +
|
||||
from skopt.plots import plot_objective
|
||||
|
||||
_ = plot_objective(optimize_result, n_points=10)
|
||||
|
||||
# +
|
||||
from skopt.plots import plot_evaluations
|
||||
|
||||
_ = plot_evaluations(optimize_result, bins=10)
|
||||
# -
|
||||
|
||||
# Learn more by exploring further
|
||||
# [examples](https://kernc.github.io/backtesting.py/doc/backtesting/index.html#tutorials)
|
||||
# or find more framework options in the
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user