Compare commits
56 Commits
master
...
hyperactiv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1e0c5bb4c | ||
|
|
e027c10b35 | ||
|
|
254987d5b7 | ||
|
|
750ca2da26 | ||
|
|
9e25c3a611 | ||
|
|
df9bed714a | ||
|
|
32daf1d3e4 | ||
|
|
09fd8676fb | ||
|
|
18841d1ce4 | ||
|
|
eb1f918d97 | ||
|
|
d16d6d76ef | ||
|
|
56bb6238d0 | ||
|
|
0b7d5422b7 | ||
|
|
22a31b9eee | ||
|
|
af151b256c | ||
|
|
2f8f471a77 | ||
|
|
eb71a4eadb | ||
|
|
cea07d4b94 | ||
|
|
ef8afab006 | ||
|
|
50f716c9b1 | ||
|
|
1586d0eeee | ||
|
|
21a9707a92 | ||
|
|
3d7c424aae | ||
|
|
d4c50aff20 | ||
|
|
0c86aa1d26 | ||
|
|
7eab98ef72 | ||
|
|
9990c70ae7 | ||
|
|
d76334411b | ||
|
|
c14f130b4a | ||
|
|
2d7ee0b137 | ||
|
|
edd2239790 | ||
|
|
7952faf268 | ||
|
|
0207233a45 | ||
|
|
56fa2f1e8a | ||
|
|
6dee3f3e76 | ||
|
|
a5c9da389f | ||
|
|
aa16daad95 | ||
|
|
93c8fb82dd | ||
|
|
5eb13b82e0 | ||
|
|
979826ca7e | ||
|
|
d010379ef5 | ||
|
|
ed9a589be9 | ||
|
|
474465aee7 | ||
|
|
2eb109ce63 | ||
|
|
559a888483 | ||
|
|
20e6ec57af | ||
|
|
150765d267 | ||
|
|
16bc1016d1 | ||
|
|
45317f953a | ||
|
|
7bca9d042d | ||
|
|
a6b9d2fd86 | ||
|
|
36a03f16e3 | ||
|
|
b5db3c82bf | ||
|
|
9a99effd06 | ||
|
|
607cd291dd | ||
|
|
e70335fa8e |
@@ -337,6 +337,31 @@ def optimize(start_date: str, finish_date: str, optimal_total: int, cpu: int, de
|
||||
optimize_mode(start_date, finish_date, optimal_total, cpu, csv, json)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument('start_date', required=True, type=str)
|
||||
@click.argument('finish_date', required=True, type=str)
|
||||
@click.argument('optimal_total', required=True, type=int)
|
||||
@click.argument('optimizer', required=True, type=str)
|
||||
@click.argument('iterations', required=True, type=int)
|
||||
@click.option('--cpu', default=0, show_default=True, help='The number of CPU cores that Jesse is allowed to use. If set to 0, it will use as many as is available on your machine.')
|
||||
@click.option('--debug/--no-debug', default=False, help='Displays detailed logs about the genetics algorithm. Use it if you are interested int he genetics algorithm.')
|
||||
def optimize_hyperactive(start_date: str, finish_date: str, optimal_total: int, optimizer: str, iterations: int, cpu: int, debug: bool) -> None:
|
||||
"""
|
||||
tunes the hyper-parameters of your strategy
|
||||
"""
|
||||
validate_cwd()
|
||||
from jesse.config import config
|
||||
config['app']['trading_mode'] = 'optimize'
|
||||
|
||||
register_custom_exception_handler()
|
||||
|
||||
# debug flag
|
||||
config['app']['debug_mode'] = debug
|
||||
|
||||
from jesse.modes.optimize_hyperactive_mode import optimize_mode_hyperactive
|
||||
|
||||
optimize_mode_hyperactive(start_date, finish_date, optimal_total, cpu, optimizer, iterations)
|
||||
|
||||
@cli.command()
|
||||
@click.argument('name', required=True, type=str)
|
||||
def make_strategy(name: str) -> None:
|
||||
|
||||
@@ -5,8 +5,9 @@ import random
|
||||
import string
|
||||
import sys
|
||||
import uuid
|
||||
from typing import List, Tuple, Union, Any
|
||||
from pprint import pprint
|
||||
from typing import List, Tuple, Union, Any
|
||||
|
||||
import arrow
|
||||
import click
|
||||
import numpy as np
|
||||
@@ -121,7 +122,7 @@ def date_to_timestamp(date: str) -> int:
|
||||
return arrow_to_timestamp(arrow.get(date, 'YYYY-MM-DD'))
|
||||
|
||||
|
||||
def dna_to_hp(strategy_hp, dna: str):
|
||||
def dna_to_hp(strategy_hp: list, dna: str) -> dict:
|
||||
hp = {}
|
||||
|
||||
for gene, h in zip(dna, strategy_hp):
|
||||
@@ -140,6 +141,23 @@ def dna_to_hp(strategy_hp, dna: str):
|
||||
return hp
|
||||
|
||||
|
||||
def hp_to_dna(strategy_hp: list, values: list) -> str:
|
||||
hp = ""
|
||||
|
||||
for h in strategy_hp:
|
||||
if h['type'] is int or h['type'] is float:
|
||||
encoded_gene = chr(
|
||||
round(
|
||||
convert_number(h['max'], h['min'], 119, 40, values[h['name']])
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise TypeError('Only int and float types are implemented')
|
||||
|
||||
hp += encoded_gene
|
||||
return hp
|
||||
|
||||
|
||||
def dump_exception() -> None:
|
||||
"""
|
||||
a useful debugging helper
|
||||
@@ -274,6 +292,29 @@ def get_strategy_class(strategy_name: str):
|
||||
return locate(f'strategies.{strategy_name}.{strategy_name}')
|
||||
|
||||
|
||||
def hp_rules_valid(hp, rules):
|
||||
check = np.full((len(rules)), False, dtype=bool)
|
||||
|
||||
for i, rule in enumerate(rules):
|
||||
if rule['operator'] not in ["<", ">", "<=", ">="]:
|
||||
raise ValueError("{} is not a supported operator. Choose from < > <= >=".format(rule['operator']))
|
||||
if rule['hp_name1'] not in hp:
|
||||
raise ValueError("The hp name {} doesn't exist.".format(rule['hp_name1']))
|
||||
if rule['hp_name2'] not in hp:
|
||||
raise ValueError("The hp name {} doesn't exist.".format(rule['hp_name2']))
|
||||
|
||||
if rule['operator'] == ">":
|
||||
check[i] = hp[rule['hp_name1']] > hp[rule['hp_name2']]
|
||||
elif rule['operator'] == "<":
|
||||
check[i] = hp[rule['hp_name1']] < hp[rule['hp_name2']]
|
||||
elif rule['operator'] == ">=":
|
||||
check[i] = hp[rule['hp_name1']] >= hp[rule['hp_name2']]
|
||||
elif rule['operator'] == "<=":
|
||||
check[i] = hp[rule['hp_name1']] <= hp[rule['hp_name2']]
|
||||
|
||||
return np.all(check == True)
|
||||
|
||||
|
||||
def insecure_hash(msg: str) -> str:
|
||||
return hashlib.md5(msg.encode()).hexdigest()
|
||||
|
||||
@@ -600,11 +641,11 @@ def round_decimals_down(number: np.ndarray, decimals: int = 2) -> float:
|
||||
Returns a value rounded down to a specific number of decimal places.
|
||||
"""
|
||||
if not isinstance(decimals, int):
|
||||
raise TypeError("decimal places must be an integer")
|
||||
raise TypeError("decimal places must be an integer")
|
||||
elif decimals < 0:
|
||||
raise ValueError("decimal places has to be 0 or more")
|
||||
raise ValueError("decimal places has to be 0 or more")
|
||||
elif decimals == 0:
|
||||
return np.floor(number)
|
||||
return np.floor(number)
|
||||
|
||||
factor = 10 ** decimals
|
||||
return np.floor(number * factor) / factor
|
||||
|
||||
334
jesse/modes/optimize_hyperactive_mode/__init__.py
Normal file
334
jesse/modes/optimize_hyperactive_mode/__init__.py
Normal file
@@ -0,0 +1,334 @@
|
||||
import ast
|
||||
import csv
|
||||
import os
|
||||
import traceback
|
||||
from math import log10
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
import click
|
||||
import hyperactive
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from hyperactive.dashboards import ProgressBoard
|
||||
|
||||
import jesse.helpers as jh
|
||||
import jesse.services.logger as logger
|
||||
import jesse.services.required_candles as required_candles
|
||||
from jesse import exceptions
|
||||
from jesse.config import config
|
||||
from jesse.modes.backtest_mode import simulator
|
||||
from jesse.routes import router
|
||||
from jesse.services import metrics as stats
|
||||
from jesse.services.validators import validate_routes
|
||||
from jesse.store import store
|
||||
# from .overfitting import CSCV
|
||||
|
||||
from optimization_algorithm_config import optimization_config
|
||||
|
||||
os.environ['NUMEXPR_MAX_THREADS'] = str(cpu_count())
|
||||
|
||||
|
||||
class Optimizer():
|
||||
def __init__(self, training_candles, optimal_total: int, cpu_cores: int, optimizer: str, iterations: int) -> None:
|
||||
if len(router.routes) != 1:
|
||||
raise NotImplementedError('optimize_mode mode only supports one route at the moment')
|
||||
|
||||
self.strategy_name = router.routes[0].strategy_name
|
||||
self.optimal_total = optimal_total
|
||||
self.exchange = router.routes[0].exchange
|
||||
self.symbol = router.routes[0].symbol
|
||||
self.timeframe = router.routes[0].timeframe
|
||||
StrategyClass = jh.get_strategy_class(self.strategy_name)
|
||||
self.strategy_hp = StrategyClass.hyperparameters(None)
|
||||
if hasattr(StrategyClass, 'hyperparameters_rules'):
|
||||
self.hyperparameters_rules = StrategyClass.hyperparameters_rules(None)
|
||||
else:
|
||||
self.hyperparameters_rules = None
|
||||
self.solution_len = len(self.strategy_hp)
|
||||
self.optimizer = optimizer
|
||||
self.iterations = iterations
|
||||
|
||||
if self.solution_len == 0:
|
||||
raise exceptions.InvalidStrategy('Targeted strategy does not implement a valid hyperparameters() method.')
|
||||
|
||||
if cpu_cores > cpu_count():
|
||||
raise ValueError(f'Entered cpu cores number is more than available on this machine which is {cpu_count()}')
|
||||
elif cpu_cores == 0:
|
||||
self.cpu_cores = cpu_count()
|
||||
else:
|
||||
self.cpu_cores = cpu_cores
|
||||
|
||||
self.training_candles = training_candles
|
||||
|
||||
key = jh.key(self.exchange, self.symbol)
|
||||
training_candles_start_date = jh.timestamp_to_time(self.training_candles[key]['candles'][0][0]).split('T')[0]
|
||||
training_candles_finish_date = jh.timestamp_to_time(self.training_candles[key]['candles'][-1][0]).split('T')[0]
|
||||
|
||||
self.training_initial_candles = []
|
||||
|
||||
for c in config['app']['considering_candles']:
|
||||
self.training_initial_candles.append(
|
||||
required_candles.load_required_candles(c[0], c[1], training_candles_start_date,
|
||||
training_candles_finish_date))
|
||||
|
||||
self.study_name = f'{self.strategy_name}-{self.exchange}-{self.symbol}-{ self.timeframe}-{self.optimizer}'
|
||||
|
||||
self.path = f'storage/optimize/csv/{self.study_name}.csv'
|
||||
os.makedirs('./storage/optimize/csv', exist_ok=True)
|
||||
|
||||
def objective_function(self, hp: str):
|
||||
score = np.nan
|
||||
try:
|
||||
if self.hyperparameters_rules is None or jh.hp_rules_valid(hp, self.hyperparameters_rules):
|
||||
# init candle store
|
||||
store.candles.init_storage(5000)
|
||||
# inject required TRAINING candles to the candle store
|
||||
|
||||
for num, c in enumerate(config['app']['considering_candles']):
|
||||
required_candles.inject_required_candles_to_store(
|
||||
self.training_initial_candles[num],
|
||||
c[0],
|
||||
c[1]
|
||||
)
|
||||
# run backtest simulation
|
||||
simulator(self.training_candles, hp)
|
||||
|
||||
training_data = stats.trades(store.completed_trades.trades, store.app.daily_balance)
|
||||
total_effect_rate = log10(training_data['total']) / log10(self.optimal_total)
|
||||
total_effect_rate = min(total_effect_rate, 1)
|
||||
ratio_config = jh.get_config('env.optimization.ratio', 'sharpe')
|
||||
if ratio_config == 'sharpe':
|
||||
ratio = training_data['sharpe_ratio']
|
||||
ratio_normalized = jh.normalize(ratio, -.5, 5)
|
||||
elif ratio_config == 'calmar':
|
||||
ratio = training_data['calmar_ratio']
|
||||
ratio_normalized = jh.normalize(ratio, -.5, 30)
|
||||
elif ratio_config == 'sortino':
|
||||
ratio = training_data['sortino_ratio']
|
||||
ratio_normalized = jh.normalize(ratio, -.5, 15)
|
||||
elif ratio_config == 'omega':
|
||||
ratio = training_data['omega_ratio']
|
||||
ratio_normalized = jh.normalize(ratio, -.5, 5)
|
||||
else:
|
||||
raise ValueError(f'The entered ratio configuration `{ratio_config}` for the optimization is unknown. Choose between sharpe, calmar, sortino and omega.')
|
||||
|
||||
if ratio > 0:
|
||||
score = total_effect_rate * ratio_normalized
|
||||
|
||||
except Exception as e:
|
||||
logger.error("".join(traceback.TracebackException.from_exception(e).format()))
|
||||
finally:
|
||||
|
||||
# you can access the entire dictionary from "para"
|
||||
parameter_dict = hp.para_dict
|
||||
|
||||
# save the score in the copy of the dictionary
|
||||
parameter_dict["score"] = score
|
||||
parameter_dict["dna"] = jh.hp_to_dna(self.strategy_hp, hp.para_dict)
|
||||
|
||||
# if score:
|
||||
# # save the daily_returns in the copy of the dictionary
|
||||
# parameter_dict["daily_balance"] = str(store.app.daily_balance)
|
||||
# else:
|
||||
# parameter_dict["daily_balance"] = np.nan
|
||||
|
||||
# append parameter dictionary to csv
|
||||
with open(self.path, "a") as f:
|
||||
writer = csv.writer(f, delimiter=';')
|
||||
fields = parameter_dict.values()
|
||||
writer.writerow(fields)
|
||||
|
||||
# reset store
|
||||
store.reset()
|
||||
|
||||
return score
|
||||
|
||||
def get_search_space(self):
|
||||
hp = {}
|
||||
for st_hp in self.strategy_hp:
|
||||
if st_hp['type'] is int:
|
||||
if 'step' not in st_hp:
|
||||
st_hp['step'] = 1
|
||||
hp[st_hp['name']] = list(range(st_hp['min'], st_hp['max'] + st_hp['step'], st_hp['step']))
|
||||
elif st_hp['type'] is float:
|
||||
if 'step' not in st_hp:
|
||||
st_hp['step'] = 0.1
|
||||
decs = str(st_hp['step'])[::-1].find('.')
|
||||
hp[st_hp['name']] = list(
|
||||
np.trunc(np.arange(st_hp['min'], st_hp['max'] + st_hp['step'], st_hp['step']) * 10 ** decs) / (
|
||||
10 ** decs))
|
||||
elif st_hp['type'] is bool:
|
||||
hp[st_hp['name']] = [True, False]
|
||||
else:
|
||||
raise TypeError('Only int, bool and float types are implemented')
|
||||
return hp
|
||||
|
||||
def run(self):
|
||||
|
||||
# create an instance of the ProgressBoard
|
||||
# progress_board = ProgressBoard()
|
||||
|
||||
hyper = hyperactive.Hyperactive(distribution="multiprocessing",
|
||||
verbosity=["progress_bar", "print_results", "print_times"])
|
||||
|
||||
self.search_space = self.get_search_space()
|
||||
|
||||
# Later use actual search space combinations to determin n_iter
|
||||
# keys, values = zip(*self.search_space.items())
|
||||
# combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
|
||||
# combinations_count = len(combinations)
|
||||
|
||||
|
||||
mem = None
|
||||
|
||||
if jh.file_exists(self.path):
|
||||
with open(self.path, "r") as f:
|
||||
mem = pd.read_csv(f, sep=";", na_values='nan')
|
||||
mem.drop('dna', axis=1, inplace=True)
|
||||
if not mem.empty and not click.confirm(
|
||||
f'Previous optimization results for {self.study_name} exists. Continue?',
|
||||
default=True,
|
||||
):
|
||||
mem = None
|
||||
if self.optimizer == "RepulsingHillClimbingOptimizer":
|
||||
optimizer = hyperactive.RepulsingHillClimbingOptimizer(
|
||||
epsilon=optimization_config[self.optimizer]['epsilon'],
|
||||
distribution=optimization_config[self.optimizer]['distribution'],
|
||||
n_neighbours=optimization_config[self.optimizer]['n_neighbours'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
repulsion_factor=optimization_config[self.optimizer]['repulsion_factor'],
|
||||
)
|
||||
elif self.optimizer == "SimulatedAnnealingOptimizer":
|
||||
optimizer = hyperactive.SimulatedAnnealingOptimizer(
|
||||
epsilon=optimization_config[self.optimizer]['epsilon'],
|
||||
distribution=optimization_config[self.optimizer]['distribution'],
|
||||
n_neighbours=optimization_config[self.optimizer]['n_neighbours'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
annealing_rate=optimization_config[self.optimizer]['annealing_rate'],
|
||||
start_temp=optimization_config[self.optimizer]['start_temp'],
|
||||
)
|
||||
elif self.optimizer == "RandomSearchOptimizer":
|
||||
optimizer = hyperactive.RandomSearchOptimizer()
|
||||
elif self.optimizer == "RandomRestartHillClimbingOptimizer":
|
||||
optimizer = hyperactive.RandomRestartHillClimbingOptimizer(
|
||||
epsilon=optimization_config[self.optimizer]['epsilon'],
|
||||
distribution=optimization_config[self.optimizer]['distribution'],
|
||||
n_neighbours=optimization_config[self.optimizer]['n_neighbours'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
n_iter_restart=optimization_config[self.optimizer]['n_iter_restart'],
|
||||
)
|
||||
elif self.optimizer == "RandomAnnealingOptimizer":
|
||||
optimizer = hyperactive.RandomAnnealingOptimizer(
|
||||
epsilon=optimization_config[self.optimizer]['epsilon'],
|
||||
distribution=optimization_config[self.optimizer]['distribution'],
|
||||
n_neighbours=optimization_config[self.optimizer]['n_neighbours'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
annealing_rate=optimization_config[self.optimizer]['annealing_rate'],
|
||||
start_temp=optimization_config[self.optimizer]['start_temp'],
|
||||
)
|
||||
elif self.optimizer == "ParallelTemperingOptimizer":
|
||||
optimizer = hyperactive.ParallelTemperingOptimizer(
|
||||
population=optimization_config[self.optimizer]['population'],
|
||||
n_iter_swap=optimization_config[self.optimizer]['n_iter_swap'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
)
|
||||
elif self.optimizer == "ParticleSwarmOptimizer":
|
||||
optimizer = hyperactive.ParticleSwarmOptimizer(
|
||||
population=optimization_config[self.optimizer]['population'],
|
||||
inertia=optimization_config[self.optimizer]['inertia'],
|
||||
cognitive_weight=optimization_config[self.optimizer]['cognitive_weight'],
|
||||
social_weight=optimization_config[self.optimizer]['social_weight'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
)
|
||||
elif self.optimizer == "EvolutionStrategyOptimizer":
|
||||
optimizer = hyperactive.EvolutionStrategyOptimizer(
|
||||
population=optimization_config[self.optimizer]['population'],
|
||||
mutation_rate=optimization_config[self.optimizer]['mutation_rate'],
|
||||
crossover_rate=optimization_config[self.optimizer]['crossover_rate'],
|
||||
rand_rest_p=optimization_config[self.optimizer]['rand_rest_p'],
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Entered optimizer which is {self.optimizer} is not known.')
|
||||
|
||||
if mem is None or mem.empty:
|
||||
# init empty pandas dataframe
|
||||
# search_data = pd.DataFrame(columns=list(self.search_space.keys()) + ["score", "daily_balance"])
|
||||
search_data = pd.DataFrame(columns=list(self.search_space.keys()) + ["score", "dna"])
|
||||
with open(self.path, "w") as f:
|
||||
search_data.to_csv(f, sep=";", index=False, na_rep='nan')
|
||||
|
||||
hyper.add_search(self.objective_function, self.search_space, optimizer=optimizer,
|
||||
n_iter=self.iterations,
|
||||
n_jobs=self.cpu_cores)
|
||||
else:
|
||||
# mem.drop('daily_balance', 1, inplace=True)
|
||||
hyper.add_search(self.objective_function, self.search_space, optimizer=optimizer, memory_warm_start=mem,
|
||||
n_iter=self.iterations,
|
||||
n_jobs=self.cpu_cores)
|
||||
hyper.run()
|
||||
|
||||
# def validate_optimization(self, cscv_nbins: int = 10):
|
||||
# with open(self.path, "r") as f:
|
||||
# results = pd.read_csv(f, sep=";", converters={'daily_balance': from_np_array}, na_values='nan')
|
||||
# results.dropna(inplace=True)
|
||||
# results.drop("score", 1, inplace=True)
|
||||
# multi_index = results.columns.tolist()
|
||||
# multi_index.remove('daily_balance')
|
||||
# results.set_index(multi_index, drop=True, inplace=True)
|
||||
# new_columns = results.index.to_flat_index()
|
||||
#
|
||||
# daily_balance = results.daily_balance.to_numpy()
|
||||
# prepared = prepare_daily_percentage(daily_balance)
|
||||
# vstack = np.vstack(prepared)
|
||||
#
|
||||
# daily_percentage = pd.DataFrame(vstack).transpose()
|
||||
# daily_percentage.columns = new_columns
|
||||
#
|
||||
# cscv_objective = lambda r: r.mean()
|
||||
# cscv = CSCV(n_bins=cscv_nbins, objective=cscv_objective)
|
||||
# cscv.add_daily_returns(daily_percentage)
|
||||
# cscv.estimate_overfitting(name=self.study_name)
|
||||
|
||||
|
||||
# first make same length
|
||||
# forward fill returns
|
||||
# return percentage change
|
||||
def prepare_daily_percentage(a):
|
||||
A = np.full((len(a), max(map(len, a))), np.nan)
|
||||
for i, aa in enumerate(a):
|
||||
A[i, :len(aa)] = aa
|
||||
ff = jh.np_ffill(A, 1)
|
||||
return np.diff(ff) / ff[:, :-1] * 100
|
||||
|
||||
|
||||
def optimize_mode_hyperactive(start_date: str, finish_date: str, optimal_total: int, cpu_cores: int, optimizer: str,
|
||||
iterations: int) -> None:
|
||||
# clear the screen
|
||||
click.clear()
|
||||
|
||||
# validate routes
|
||||
validate_routes(router)
|
||||
|
||||
# load historical candles and divide them into training
|
||||
# and testing candles (15% for test, 85% for training)
|
||||
training_candles = get_training_candles(start_date, finish_date)
|
||||
|
||||
optimizer = Optimizer(training_candles, optimal_total, cpu_cores, optimizer, iterations)
|
||||
|
||||
print('Starting optimization...')
|
||||
|
||||
optimizer.run()
|
||||
|
||||
# print('Starting validation...')
|
||||
|
||||
# optimizer.validate_optimization()
|
||||
|
||||
|
||||
def get_training_candles(start_date_str: str, finish_date_str: str):
|
||||
# Load candles (first try cache, then database)
|
||||
from jesse.modes.backtest_mode import load_candles
|
||||
return load_candles(start_date_str, finish_date_str)
|
||||
|
||||
|
||||
def from_np_array(array_string):
|
||||
return np.array(ast.literal_eval(array_string))
|
||||
151
jesse/modes/optimize_hyperactive_mode/overfitting.py
Normal file
151
jesse/modes/optimize_hyperactive_mode/overfitting.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import itertools as itr
|
||||
import math
|
||||
import os
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from statsmodels.distributions.empirical_distribution import ECDF
|
||||
|
||||
|
||||
class CSCV(object):
|
||||
"""Combinatorially symmetric cross-validation algorithm.
|
||||
|
||||
Calculate backtesting about overfitting probability distribution and performance degradation.
|
||||
|
||||
Attributes:
|
||||
n_bins:A int of CSCV algorithm bin size to control overfitting calculation.Default is 10.
|
||||
objective:A function of in sample(is) and out of sample(oos) return benchmark algorithm.Default is lambda r:r.mean().
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, n_bins=10, objective=lambda r: r.mean()):
|
||||
self.n_bins = n_bins
|
||||
self.objective = objective
|
||||
self.bins_enumeration = [set(x) for x in itr.combinations(np.arange(10), 10 // 2)]
|
||||
|
||||
self.Rs = [pd.Series(dtype=float) for i in range(len(self.bins_enumeration))]
|
||||
self.R_bars = [pd.Series(dtype=float) for i in range(len(self.bins_enumeration))]
|
||||
|
||||
def add_daily_returns(self, daily_returns):
|
||||
"""Add daily_returns in algorithm.
|
||||
|
||||
Args:
|
||||
daily_returns: A dataframe of trading daily_returns.
|
||||
|
||||
"""
|
||||
bin_size = daily_returns.shape[0] // self.n_bins
|
||||
bins = [daily_returns.iloc[i * bin_size: (i + 1) * bin_size] for i in range(self.n_bins)]
|
||||
|
||||
for set_id, is_set in enumerate(self.bins_enumeration):
|
||||
oos_set = set(range(10)) - is_set
|
||||
is_returns = pd.concat([bins[i] for i in is_set])
|
||||
oos_returns = pd.concat([bins[i] for i in oos_set])
|
||||
R = self.objective(is_returns)
|
||||
R_bar = self.objective(oos_returns)
|
||||
self.Rs[set_id] = self.Rs[set_id].append(R)
|
||||
self.R_bars[set_id] = self.R_bars[set_id].append(R_bar)
|
||||
|
||||
def estimate_overfitting(self, name: str):
|
||||
"""Estimate overfitting probability.
|
||||
|
||||
Generate the result on Combinatorially symmetric cross-validation algorithm.
|
||||
Display related analysis charts.
|
||||
|
||||
Args:
|
||||
plot: A bool of control plot display. Default is False.
|
||||
|
||||
Returns:
|
||||
A dict of result include:
|
||||
pbo_test: A float of overfitting probability.
|
||||
logits: A float of estimated logits of OOS rankings.
|
||||
R_n_star: A list of IS performance of th trategies that has the best ranking in IS.
|
||||
R_bar_n_star: A list of find the OOS performance of the strategies that has the best ranking in IS.
|
||||
dom_df: A dataframe of optimized_IS, non_optimized_OOS data.
|
||||
|
||||
"""
|
||||
# calculate strategy performance in IS(R_df) and OOS(R_bar_df)
|
||||
R_df = pd.DataFrame(self.Rs)
|
||||
R_bar_df = pd.DataFrame(self.R_bars)
|
||||
|
||||
# calculate ranking of the strategies
|
||||
R_rank_df = R_df.rank(axis=1, ascending=False, method='first')
|
||||
R_bar_rank_df = R_bar_df.rank(axis=1, ascending=False, method='first')
|
||||
|
||||
# find the IS performance of th trategies that has the best ranking in IS
|
||||
r_star_series = (R_df * (R_rank_df == 1)).unstack().dropna()
|
||||
r_star_series = r_star_series[r_star_series != 0].sort_index(level=-1)
|
||||
|
||||
# find the OOS performance of the strategies that has the best ranking in IS
|
||||
r_bar_star_series = (R_bar_df * (R_rank_df == 1)).unstack().dropna()
|
||||
r_bar_star_series = r_bar_star_series[r_bar_star_series != 0].sort_index(level=-1)
|
||||
|
||||
# find the ranking of strategies which has the best ranking in IS
|
||||
r_bar_rank_series = (R_bar_rank_df * (R_rank_df == 1)).unstack().dropna()
|
||||
r_bar_rank_series = r_bar_rank_series[r_bar_rank_series != 0].sort_index(level=-1)
|
||||
|
||||
# probability of overfitting
|
||||
|
||||
# estimate logits of OOS rankings
|
||||
logits = (1 - ((r_bar_rank_series) / (len(R_df.columns) + 1))).map(lambda p: math.log(p / (1 - p)))
|
||||
prob = (logits < 0).sum() / len(logits)
|
||||
|
||||
# stochastic dominance
|
||||
|
||||
# caluclate
|
||||
if len(r_bar_star_series) != 0:
|
||||
y = np.linspace(
|
||||
min(r_bar_star_series), max(r_bar_star_series), endpoint=True, num=1000
|
||||
)
|
||||
|
||||
# build CDF performance of best candidate in IS
|
||||
R_bar_n_star_cdf = ECDF(r_bar_star_series.values)
|
||||
optimized = R_bar_n_star_cdf(y)
|
||||
|
||||
# build CDF performance of average candidate in IS
|
||||
R_bar_mean_cdf = ECDF(R_bar_df.median(axis=1).values)
|
||||
non_optimized = R_bar_mean_cdf(y)
|
||||
|
||||
#
|
||||
dom_df = pd.DataFrame(
|
||||
dict(optimized_IS=optimized, non_optimized_OOS=non_optimized)
|
||||
, index=y)
|
||||
dom_df["SD2"] = -(dom_df.non_optimized_OOS - dom_df.optimized_IS).cumsum()
|
||||
else:
|
||||
dom_df = pd.DataFrame(columns=['optimized_IS', 'non_optimized_OOS', 'SD2'])
|
||||
|
||||
ret = {
|
||||
'pbo_test': (logits < 0).sum() / len(logits),
|
||||
'logits': logits.to_list(),
|
||||
'R_n_star': r_star_series.to_list(),
|
||||
'R_bar_n_star': r_bar_star_series.to_list(),
|
||||
'dom_df': dom_df,
|
||||
}
|
||||
|
||||
path = 'storage/optimize/validation/{}'.format(name)
|
||||
os.makedirs('./storage/optimize/validation/{}'.format(name), exist_ok=True)
|
||||
|
||||
# probability distribution
|
||||
plt.title('Probability Distribution')
|
||||
plt.hist(x=[l for l in ret['logits'] if l > -10000], bins='auto')
|
||||
plt.xlabel('Logits')
|
||||
plt.ylabel('Frequency')
|
||||
plt.savefig('{}/Probability Distribution.png'.format(path))
|
||||
|
||||
# performance degradation
|
||||
plt.title('Performance degradation')
|
||||
plt.scatter(ret['R_n_star'], ret['R_bar_n_star'])
|
||||
plt.xlabel('In-sample Performance')
|
||||
plt.ylabel('Out-of-sample Performance')
|
||||
plt.savefig('{}/Performance degradation.png'.format(path))
|
||||
|
||||
# first and second Stochastic dominance
|
||||
plt.title('Stochastic dominance')
|
||||
ret['dom_df'].plot(secondary_y=['SD2'])
|
||||
plt.xlabel('Performance optimized vs non-optimized')
|
||||
plt.ylabel('Frequency')
|
||||
plt.savefig('{}/Stochastic dominance.png'.format(path))
|
||||
|
||||
print('Validation plots saved in {}'.format(path))
|
||||
|
||||
return ret
|
||||
@@ -30,8 +30,6 @@ def test_base_asset():
|
||||
assert jh.base_asset('DEFI-USD') == 'DEFI'
|
||||
|
||||
|
||||
|
||||
|
||||
def test_binary_search():
|
||||
arr = [0, 11, 22, 33, 44, 54, 55]
|
||||
|
||||
@@ -104,6 +102,18 @@ def test_dna_to_hp():
|
||||
assert jh.dna_to_hp(strategy_hp, dna) == {'hp1': 0.08518987341772151, 'hp2': 3}
|
||||
|
||||
|
||||
def test_hp_to_dna():
|
||||
strategy_hp = [
|
||||
{'name': 'hp1', 'type': float, 'min': 0.01, 'max': 1.0, 'default': 0.09},
|
||||
{'name': 'hp2', 'type': int, 'min': 1, 'max': 10, 'default': 2},
|
||||
]
|
||||
para_dict = {
|
||||
"hp1": 0.08518987341772151,
|
||||
"hp2": 3,
|
||||
}
|
||||
assert jh.hp_to_dna(strategy_hp, para_dict) == '.:'
|
||||
|
||||
|
||||
def test_dump_exception():
|
||||
# uses database, which is not existing during testing
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user