mirror of
https://github.com/thomashirtz/portfolio-management.git
synced 2022-03-03 23:56:42 +03:00
Initial commit
This commit is contained in:
17
.gitignore
vendored
Normal file
17
.gitignore
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# Files recommanded by JetBrain
|
||||
**/.idea/workspace.xml
|
||||
**/.idea/tasks.xml
|
||||
|
||||
/.idea/
|
||||
|
||||
# Pycache folders generated for speeding up execution
|
||||
__pycache__/
|
||||
|
||||
# Pytest folders generated when testing
|
||||
/.pytest_cache/
|
||||
|
||||
# Distribution & Packaging
|
||||
*.egg-info/
|
||||
|
||||
# Data related
|
||||
/databases/
|
||||
33
.pre-commit-config.yaml
Normal file
33
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# todo working on seting up pre-commit
|
||||
# https://pre-commit.com/
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.0.1
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
# isort should run before black as black sometimes tweaks the isort output
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.8.0
|
||||
hooks:
|
||||
- id: isort
|
||||
# https://github.com/python/black#version-control-integration
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 21.5b2
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/keewis/blackdoc
|
||||
rev: v0.3.3
|
||||
hooks:
|
||||
- id: blackdoc
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.9.2
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# version must correspond to the one in .github/workflows/ci-additional.yaml
|
||||
rev: v0.812
|
||||
hooks:
|
||||
- id: mypy
|
||||
exclude: "properties|asv_bench"
|
||||
6
notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
6
notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
1207
notebooks/.ipynb_checkpoints/database-checkpoint.ipynb
Normal file
1207
notebooks/.ipynb_checkpoints/database-checkpoint.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1207
notebooks/database.ipynb
Normal file
1207
notebooks/database.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
6
portfolio_management/__init__.py
Normal file
6
portfolio_management/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from gym.envs.registration import register
|
||||
|
||||
register(
|
||||
id='Portfolio-v0',
|
||||
entry_point='portfolio_management.environment:PortfolioEnv',
|
||||
)
|
||||
21
portfolio_management/api.py
Normal file
21
portfolio_management/api.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import pandas as pd
|
||||
from binance import Client
|
||||
|
||||
|
||||
def get_kline_dataframe(symbol: str, interval: str, start: str, end: str):
|
||||
columns = [
|
||||
'open_time', 'open', 'high', 'low', 'close', 'volume',
|
||||
'close_time', 'quote_asset_volume', 'number_of_trades',
|
||||
'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume',
|
||||
'ignore'
|
||||
]
|
||||
|
||||
client = Client()
|
||||
klines = client.get_historical_klines(symbol, interval, start_str=start, end_str=end)
|
||||
|
||||
dataframe = pd.DataFrame(klines, columns=columns)
|
||||
dataframe.index = dataframe['close_time']
|
||||
|
||||
dataframe['open_time'] = pd.to_datetime(dataframe['open_time'] * 1_000_000)
|
||||
dataframe['close_time'] = pd.to_datetime((dataframe['close_time'] + 1) * 1_000_000)
|
||||
return dataframe
|
||||
0
portfolio_management/database/__init__.py
Normal file
0
portfolio_management/database/__init__.py
Normal file
65
portfolio_management/database/bases.py
Normal file
65
portfolio_management/database/bases.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from sqlalchemy import Float
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import String
|
||||
from sqlalchemy import DateTime
|
||||
from sqlalchemy import Integer
|
||||
from sqlalchemy import ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class Symbol(Base):
|
||||
__tablename__ = "symbol"
|
||||
|
||||
id = Column('id', Integer, primary_key=True)
|
||||
name = Column('name', String)
|
||||
# measurements = relationship("Data", backref="symbol", cascade_backrefs=False)
|
||||
|
||||
def __repr__(self):
|
||||
return f"{type(self).__name__}(id={self.id}, name={self.name})"
|
||||
|
||||
|
||||
class Time(Base):
|
||||
__tablename__ = "time"
|
||||
|
||||
id = Column('id', Integer, primary_key=True)
|
||||
value = Column('value', DateTime)
|
||||
# data = relationship("Data", backref="datetime", cascade_backrefs=False)
|
||||
# todo check why it errors
|
||||
# https://docs.sqlalchemy.org/en/13/orm/join_conditions.html#handling-multiple-join-paths
|
||||
|
||||
def __repr__(self):
|
||||
return f"{type(self).__name__}(id={self.id}, value={self.value})"
|
||||
|
||||
|
||||
class Property(Base):
|
||||
__tablename__ = "property"
|
||||
|
||||
id = Column('id', Integer, primary_key=True)
|
||||
name = Column('name', String)
|
||||
# data = relationship("Data", backref="property", cascade_backrefs=False)
|
||||
|
||||
def __repr__(self):
|
||||
return f"{type(self).__name__}(id={self.id}, name={self.name})"
|
||||
|
||||
|
||||
class Data(Base):
|
||||
__tablename__ = "data"
|
||||
|
||||
id = Column('id', Integer, primary_key=True)
|
||||
value = Column('value', Float)
|
||||
|
||||
symbol_id = Column('symbol_id', Integer, ForeignKey('symbol.id'))
|
||||
property_id = Column('property_id', Integer, ForeignKey('property.id'))
|
||||
|
||||
open_time_id = Column(Integer, ForeignKey('time.id'))
|
||||
close_time_id = Column(Integer, ForeignKey('time.id'))
|
||||
|
||||
open_time = relationship("Time", foreign_keys=[open_time_id])
|
||||
close_time = relationship("Time", foreign_keys=[close_time_id])
|
||||
|
||||
def __repr__(self):
|
||||
return f"{type(self).__name__}(id={self.id}, name={self.name}, symbol_id={self.symbol_id}, property_id={self.property_id}, open_time_id={self.open_time_id}, close_time_id={self.close_time_id})" # noqa
|
||||
39
portfolio_management/database/core.py
Normal file
39
portfolio_management/database/core.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import yaml
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
from portfolio_management.database.insert import Insert
|
||||
from portfolio_management.database.initialization import Initialization
|
||||
from portfolio_management.api import get_kline_dataframe
|
||||
|
||||
|
||||
def setup_database(
|
||||
folder_path: str,
|
||||
database_name: str,
|
||||
symbol_list: List[str],
|
||||
interval: str,
|
||||
start: str,
|
||||
end: str,
|
||||
):
|
||||
|
||||
initialization = Initialization(folder_path=folder_path, database_name=database_name)
|
||||
initialization.run(symbol_list=symbol_list, reset_tables=True)
|
||||
insert = Insert(folder_path=folder_path, database_name=database_name)
|
||||
|
||||
for symbol in symbol_list:
|
||||
data = get_kline_dataframe(symbol=symbol, interval=interval, start=start, end=end)
|
||||
insert.run(symbol=symbol, data=data)
|
||||
|
||||
config = {
|
||||
"folder_path": folder_path,
|
||||
"database_name": database_name,
|
||||
"symbol_list": symbol_list,
|
||||
"interval": interval,
|
||||
"start": start,
|
||||
"end": end,
|
||||
}
|
||||
path_yaml_file = Path(folder_path).joinpath(database_name).with_suffix('.yaml')
|
||||
with open(path_yaml_file, 'w') as f:
|
||||
yaml.dump(config, f, sort_keys=False)
|
||||
print('config saved')
|
||||
print()
|
||||
58
portfolio_management/database/initialization.py
Normal file
58
portfolio_management/database/initialization.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from typing import Optional
|
||||
from typing import Type
|
||||
|
||||
from sqlalchemy import inspect
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from portfolio_management import utilities
|
||||
|
||||
from portfolio_management.database.bases import Base
|
||||
from portfolio_management.database.bases import Symbol
|
||||
from portfolio_management.database.bases import Property
|
||||
|
||||
from portfolio_management.database.utilities import session_scope
|
||||
from portfolio_management.database.utilities import silent_insert
|
||||
from portfolio_management.database.utilities import get_engine_url
|
||||
from portfolio_management.database.utilities import get_path_database
|
||||
|
||||
|
||||
class Initialization:
|
||||
def __init__(self, folder_path: str, database_name: str, echo=False):
|
||||
self.database_name = database_name
|
||||
self.folder_path = folder_path
|
||||
|
||||
utilities.create_folders(get_path_database(folder_path, database_name).parent)
|
||||
|
||||
self.engine_url = get_engine_url(folder_path, database_name)
|
||||
self.engine = create_engine(self.engine_url, echo=echo)
|
||||
self.Session = sessionmaker(bind=self.engine)
|
||||
|
||||
def run(self, symbol_list: Optional[list], reset_tables=False) -> None:
|
||||
|
||||
type_list = [
|
||||
'open', 'high', 'low', 'close', 'volume', 'quote_asset_volume',
|
||||
'number_of_trades', 'taker_buy_base_asset_volume',
|
||||
'taker_buy_quote_asset_volume'
|
||||
]
|
||||
|
||||
if reset_tables:
|
||||
Base.metadata.drop_all(bind=self.engine)
|
||||
|
||||
if self.database_is_empty(self.engine) or reset_tables:
|
||||
Base.metadata.create_all(bind=self.engine)
|
||||
with session_scope(self.Session) as session:
|
||||
self.table_initialization(session, Property, type_list)
|
||||
self.table_initialization(session, Symbol, symbol_list)
|
||||
|
||||
@staticmethod
|
||||
def table_initialization(session, base: Type[Base], name_list: list) -> None:
|
||||
for name in name_list:
|
||||
instance = base(name=name)
|
||||
silent_insert(session, instance)
|
||||
|
||||
@staticmethod
|
||||
def database_is_empty(engine):
|
||||
table_names = inspect(engine).get_table_names()
|
||||
is_empty = table_names == []
|
||||
return is_empty
|
||||
58
portfolio_management/database/insert.py
Normal file
58
portfolio_management/database/insert.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import pandas as pd
|
||||
|
||||
from portfolio_management.database.bases import Time
|
||||
from portfolio_management.database.bases import Data
|
||||
|
||||
from portfolio_management.database.retrieve import get_symbol_id
|
||||
from portfolio_management.database.retrieve import get_time_mapping
|
||||
from portfolio_management.database.retrieve import get_property_mapping
|
||||
|
||||
from portfolio_management.database.utilities import session_scope
|
||||
from portfolio_management.database.utilities import get_sessionmaker
|
||||
from portfolio_management.database.utilities import silent_bulk_insert
|
||||
|
||||
|
||||
class Insert:
|
||||
def __init__(
|
||||
self,
|
||||
folder_path: str,
|
||||
database_name: str,
|
||||
echo: bool = False
|
||||
):
|
||||
self.Session = get_sessionmaker(folder_path, database_name, echo)
|
||||
|
||||
def run(self, symbol: str, data: pd.DataFrame) -> None:
|
||||
|
||||
with session_scope(self.Session) as session:
|
||||
self._insert_datetime(session, data)
|
||||
self._insert_data(session, symbol, data)
|
||||
|
||||
@staticmethod
|
||||
def _insert_datetime(session, data: pd.DataFrame) -> None:
|
||||
datetime_set = set(data['open_time'])
|
||||
datetime_set.update(set(data['close_time']))
|
||||
instances = [Time(value=datetime) for datetime in datetime_set]
|
||||
silent_bulk_insert(session, instances)
|
||||
|
||||
@staticmethod
|
||||
def _insert_data(session, symbol: str, data: pd.DataFrame) -> None:
|
||||
time_mapping = get_time_mapping(session=session)
|
||||
property_mapping = get_property_mapping(session=session)
|
||||
symbol_id = get_symbol_id(session=session, symbol=symbol)
|
||||
|
||||
data['open_time_id'] = data["open_time"].map(time_mapping)
|
||||
data['close_time_id'] = data["close_time"].map(time_mapping)
|
||||
|
||||
instances = []
|
||||
for _, row in data.iterrows():
|
||||
for property_name, property_id in property_mapping.items():
|
||||
instance = Data(
|
||||
value=row[property_name],
|
||||
symbol_id=symbol_id,
|
||||
property_id=property_id,
|
||||
open_time_id=row['open_time_id'],
|
||||
close_time_id=row['close_time_id'],
|
||||
)
|
||||
instances.append(instance)
|
||||
silent_bulk_insert(session=session, instances=instances)
|
||||
|
||||
116
portfolio_management/database/retrieve.py
Normal file
116
portfolio_management/database/retrieve.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from typing import List
|
||||
from typing import Type
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
import xarray as xr
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from portfolio_management.database.bases import Time
|
||||
from portfolio_management.database.bases import Base
|
||||
from portfolio_management.database.bases import Data
|
||||
from portfolio_management.database.bases import Symbol
|
||||
from portfolio_management.database.bases import Property
|
||||
|
||||
from portfolio_management.database.utilities import session_scope
|
||||
from portfolio_management.database.utilities import get_sessionmaker
|
||||
from portfolio_management.database.utilities import remove_keys_from_dictionary
|
||||
|
||||
|
||||
def get_symbol_id(session: Session, symbol: str) -> int:
|
||||
return session.query(Symbol.id).filter(Symbol.name == symbol).first()[0]
|
||||
|
||||
|
||||
def get_symbol_list(session: Session) -> list:
|
||||
symbol_tuples = session.query(Symbol.name).all()
|
||||
return [symbol_tuple[0] for symbol_tuple in symbol_tuples]
|
||||
|
||||
|
||||
def _get_mapping(session: Session, base: Type[Base], key_attr: str, value_attr: str):
|
||||
instances = list(session.query(base))
|
||||
return {getattr(instance, key_attr): getattr(instance, value_attr) for instance in instances}
|
||||
|
||||
|
||||
def get_property_mapping(session: Session, id_to_name: bool = False):
|
||||
key_attr = 'id' if id_to_name else 'name'
|
||||
value_attr = 'name' if id_to_name else 'id'
|
||||
return _get_mapping(session, Property, key_attr=key_attr, value_attr=value_attr)
|
||||
|
||||
|
||||
def get_symbol_mapping(session: Session, id_to_name: bool = False):
|
||||
key_attr = 'id' if id_to_name else 'name'
|
||||
value_attr = 'name' if id_to_name else 'id'
|
||||
return _get_mapping(session, Symbol, key_attr=key_attr, value_attr=value_attr)
|
||||
|
||||
|
||||
def get_time_mapping(session: Session, id_to_value: bool = False):
|
||||
key_attr = 'id' if id_to_value else 'value'
|
||||
value_attr = 'value' if id_to_value else 'id'
|
||||
return _get_mapping(session, Time, key_attr=key_attr, value_attr=value_attr)
|
||||
|
||||
|
||||
def get_dataframe(
|
||||
folder_path: str,
|
||||
database_name: str,
|
||||
symbol: str,
|
||||
echo: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
|
||||
with session_scope(
|
||||
get_sessionmaker(folder_path, database_name, echo),
|
||||
expire_on_commit=False
|
||||
) as session:
|
||||
|
||||
symbol_id = get_symbol_id(session=session, symbol=symbol)
|
||||
property_mapping = get_property_mapping(session=session, id_to_name=True)
|
||||
instances = session.query(Data).filter(Data.symbol_id == symbol_id).all()
|
||||
|
||||
records = {}
|
||||
for instance in instances:
|
||||
if instance.close_time_id not in records.keys():
|
||||
property_name = property_mapping[instance.property_id]
|
||||
dictionary = remove_keys_from_dictionary(
|
||||
instance.__dict__,
|
||||
['_sa_instance_state', 'value', 'property_id', 'id', 'symbol_id']
|
||||
)
|
||||
dictionary[property_name] = instance.value
|
||||
records[instance.close_time_id] = dictionary
|
||||
else:
|
||||
property_name = property_mapping[instance.property_id]
|
||||
records[instance.close_time_id][property_name] = instance.value
|
||||
dataframe = pd.DataFrame.from_dict(records, orient='index')
|
||||
|
||||
time_mapping = get_time_mapping(session=session, id_to_value=True)
|
||||
dataframe['open_time'] = dataframe["open_time_id"].map(time_mapping)
|
||||
dataframe['close_time'] = dataframe["close_time_id"].map(time_mapping)
|
||||
dataframe.drop(['open_time_id', 'close_time_id'], axis=1, inplace=True)
|
||||
# todo need to setup index column
|
||||
return dataframe
|
||||
|
||||
|
||||
def get_dataset(
|
||||
folder_path: str,
|
||||
database_name: str,
|
||||
symbol_list: Optional[List[str]] = None,
|
||||
echo: bool = False,
|
||||
) -> xr.Dataset:
|
||||
with session_scope(
|
||||
get_sessionmaker(folder_path, database_name, echo),
|
||||
expire_on_commit=False
|
||||
) as session:
|
||||
symbol_list = symbol_list or get_symbol_list(session=session) # noqa
|
||||
|
||||
ds_list = []
|
||||
for symbol in symbol_list:
|
||||
df = get_dataframe(
|
||||
folder_path=folder_path,
|
||||
database_name=database_name,
|
||||
symbol=symbol,
|
||||
)
|
||||
ds_list.append(xr.Dataset.from_dataframe(df))
|
||||
ds = xr.concat(ds_list, dim='symbol')
|
||||
ds['symbol'] = symbol_list
|
||||
|
||||
# ds['trades'] = ds['trades'] + epsilon
|
||||
return ds
|
||||
79
portfolio_management/database/utilities.py
Normal file
79
portfolio_management/database/utilities.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from typing import Type
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from portfolio_management.database.bases import Base
|
||||
|
||||
|
||||
def get_path_database(folder_path: str, database_name: str) -> Path:
|
||||
return Path(folder_path).joinpath(database_name).with_suffix('.db')
|
||||
|
||||
|
||||
def get_engine_url(folder_path: str, database_name: str) -> str:
|
||||
path_database = get_path_database(folder_path, database_name)
|
||||
return r'sqlite:///' + str(path_database)
|
||||
|
||||
|
||||
def get_sessionmaker(
|
||||
folder_path: str,
|
||||
database_name: str,
|
||||
echo: bool = False,
|
||||
timeout: int = 60
|
||||
) -> sessionmaker:
|
||||
engine_url = get_engine_url(folder_path, database_name)
|
||||
engine = create_engine(engine_url, echo=echo, connect_args={'timeout': timeout})
|
||||
return sessionmaker(bind=engine)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def session_scope(session_maker: sessionmaker, **kwargs) -> None:
|
||||
"""Provide a transactional scope around a series of operations."""
|
||||
session = session_maker(**kwargs)
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
def silent_insert(session: Session, instance: Type[Base]) -> None:
|
||||
try:
|
||||
session.add(instance)
|
||||
session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
print('silent_insert_exception', e)
|
||||
|
||||
|
||||
def silent_bulk_insert(session: Session, instances: List[Type[Base]]) -> None:
|
||||
try:
|
||||
session.bulk_save_objects(instances)
|
||||
session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
print('silent_bulk_insert_exception', e)
|
||||
|
||||
|
||||
def find_instance(
|
||||
session: Session,
|
||||
base: Base,
|
||||
column_to_value: dict
|
||||
) -> Type[Base]:
|
||||
criteria = (getattr(base, column).like(value) for column, value in column_to_value.items())
|
||||
return session.query(base).filter(*criteria).first()
|
||||
|
||||
|
||||
def inner_join(a, b) -> list:
|
||||
return list(set(a) & set(b))
|
||||
|
||||
|
||||
def remove_keys_from_dictionary(dictionary: dict, keys: list) -> dict:
|
||||
return {k: v for k, v in dictionary.items() if k not in keys}
|
||||
1
portfolio_management/dataloader.py
Normal file
1
portfolio_management/dataloader.py
Normal file
@@ -0,0 +1 @@
|
||||
# todo maybe put all the dataloaders into a dataloader subpackage
|
||||
6
portfolio_management/environment/__init__.py
Normal file
6
portfolio_management/environment/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from gym.envs.registration import register
|
||||
|
||||
register(
|
||||
id='Portfolio-v0',
|
||||
entry_point='portfolio_management.environment:PortfolioEnv',
|
||||
)
|
||||
46
portfolio_management/environment/data.py
Normal file
46
portfolio_management/environment/data.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from typing import Optional
|
||||
import pandas as pd
|
||||
import xarray as xr
|
||||
|
||||
from portfolio_management.utilities import get_unix_time
|
||||
|
||||
|
||||
# todo add function to download the data
|
||||
|
||||
|
||||
def get_dataframe(path: Union[str, Path]):
|
||||
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']
|
||||
dataframe = pd.read_csv(path, names=columns, index_col='time')
|
||||
dataframe['time'] = dataframe.index
|
||||
return dataframe
|
||||
|
||||
|
||||
def get_dataset(
|
||||
currencies,
|
||||
suffix: str,
|
||||
folder_path,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
epsilon: float = 10 ** -10,
|
||||
):
|
||||
ds_list = []
|
||||
for currency in currencies:
|
||||
path = Path(folder_path) / (currency + suffix)
|
||||
df = get_dataframe(path)
|
||||
ds_list.append(xr.Dataset.from_dataframe(df))
|
||||
ds = xr.concat(ds_list, dim='currency', join='inner')
|
||||
ds['currency'] = currencies
|
||||
if start_date:
|
||||
ds = ds.where(get_unix_time(start_date) < ds.time, drop=True)
|
||||
if end_date:
|
||||
ds = ds.where(ds.time < get_unix_time(end_date), drop=True)
|
||||
|
||||
ds['volume'] = ds['volume'] + epsilon
|
||||
ds['trades'] = ds['trades'] + epsilon
|
||||
return ds
|
||||
|
||||
|
||||
def sample_dataset(dataset: xr.Dataset, sample_size: int, time: int):
|
||||
return dataset.sel(time=slice(None, time)).tail(time=sample_size)
|
||||
116
portfolio_management/environment/environment.py
Normal file
116
portfolio_management/environment/environment.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from typing import Tuple
|
||||
from typing import Optional
|
||||
|
||||
from gym import Env
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import softmax
|
||||
|
||||
from portfolio_management.market import Market
|
||||
from portfolio_management.portfolio import Portfolio
|
||||
from portfolio_management.utilities import rate_calculator
|
||||
from portfolio_management.utilities import get_str_time
|
||||
|
||||
|
||||
DEFAULT_PRINCIPAL_RANGE = [10, 1000]
|
||||
DEFAULT_TIMESTEP_TO_STEP = {60: 12, 15: 4}
|
||||
DEFAULT_CURRENCIES = ['ETH', 'XBT', 'USDT', 'XRP', 'XDG']
|
||||
|
||||
|
||||
class PortfolioEnv(Env): # noqa
|
||||
def __init__(
|
||||
self,
|
||||
currencies: Optional[list] = None,
|
||||
num_steps: int = 100,
|
||||
fees: float = 0.002,
|
||||
seed: Optional[int] = None,
|
||||
chronologically: bool = True,
|
||||
folder_path: str = '..//Kraken_OHLCVT',
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
config: Optional[dict] = None,
|
||||
timestep_per_step: Optional[int] = 1,
|
||||
principal_range: Optional[list] = None,
|
||||
):
|
||||
|
||||
self.seed(seed)
|
||||
self.current_step = None
|
||||
self.num_steps = num_steps
|
||||
self.timestep_per_step = timestep_per_step
|
||||
self.config = config or DEFAULT_TIMESTEP_TO_STEP
|
||||
self.currencies = currencies or DEFAULT_CURRENCIES
|
||||
self.principal_range = principal_range or DEFAULT_PRINCIPAL_RANGE
|
||||
|
||||
self.portfolio = Portfolio(
|
||||
self.currencies,
|
||||
fees=fees,
|
||||
principal_range=self.principal_range,
|
||||
)
|
||||
|
||||
self.market = Market(
|
||||
folder_path=folder_path,
|
||||
currencies=self.currencies,
|
||||
config=self.config,
|
||||
max_steps=num_steps,
|
||||
timestep_per_step=timestep_per_step,
|
||||
apply_log=True,
|
||||
chronologically=chronologically,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
prod = sum(self.config.values())
|
||||
state = ['Open', 'High', 'Low', 'Close', 'Volume', 'Trades']
|
||||
self.action_space = spaces.Box(
|
||||
low=0,
|
||||
high=1,
|
||||
shape=(len(self.currencies),)
|
||||
)
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf,
|
||||
high=np.inf,
|
||||
shape=(len(self.currencies) * len(state) * prod + len(self.currencies) + 2,)
|
||||
)
|
||||
|
||||
def seed(self, seed=None) -> list:
|
||||
self.np_random, seed = seeding.np_random(seed) # noqa
|
||||
return [seed]
|
||||
|
||||
def reset(self) -> list:
|
||||
self.current_step = 0
|
||||
portfolio_state = self.portfolio.reset()
|
||||
self.market.reset()
|
||||
market_state, _, _ = self.market.step() # todo check should we count -1
|
||||
return np.concatenate((market_state, portfolio_state))
|
||||
|
||||
def step(self, action: list) -> Tuple[list, float, bool, dict]:
|
||||
if sum(action) == 1:
|
||||
proportions = np.array(action)
|
||||
else:
|
||||
proportions = softmax(action)
|
||||
|
||||
self.current_step += 1
|
||||
done = True if self.current_step >= self.num_steps else False
|
||||
market_state, open_, close = self.market.step()
|
||||
reward, _, portfolio_state = self.portfolio.step(proportions, open_, close)
|
||||
|
||||
state = np.concatenate((market_state, portfolio_state))
|
||||
time = self.current_step * self.market.main_timestep * self.market.timestep_per_step / (60 * 24)
|
||||
rate = rate_calculator(self.portfolio.amount, self.portfolio.principal, time)
|
||||
|
||||
info = {}
|
||||
for i, currency in enumerate(self.currencies):
|
||||
info[f'step:portfolio/{currency}'] = float(proportions[i])
|
||||
|
||||
info['information/date'] = get_str_time(self.market.current_time)
|
||||
|
||||
info['information/amount'] = self.portfolio.amount
|
||||
info['information/principal'] = self.portfolio.principal
|
||||
|
||||
info['portfolio/monthly_interest'] = np.exp(1) ** (rate * 30.5) - 1
|
||||
info['portfolio/yearly_interest'] = np.exp(1) ** (rate * 365) - 1
|
||||
info['portfolio/current_interest'] = self.portfolio.amount / self.portfolio.principal - 1
|
||||
|
||||
return state, reward, done, info
|
||||
36
portfolio_management/environment/evaluation.py
Normal file
36
portfolio_management/environment/evaluation.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import gym
|
||||
import numpy as np
|
||||
from typing import Optional
|
||||
|
||||
from portfolio_management.utilities import get_str_time
|
||||
|
||||
|
||||
def evaluate_hold(
|
||||
holding_values: list,
|
||||
num_episodes: int = 500,
|
||||
env_name: str = 'Portfolio-v0',
|
||||
env_kwargs: Optional[dict] = None,
|
||||
):
|
||||
|
||||
env_kwargs = env_kwargs or {}
|
||||
env = gym.make(env_name, **env_kwargs)
|
||||
|
||||
score_history = []
|
||||
|
||||
for episode in range(num_episodes):
|
||||
score = 0
|
||||
done = False
|
||||
episode_step = 0
|
||||
_ = env.reset()
|
||||
|
||||
while not done:
|
||||
new_observation, reward, done, info = env.step(holding_values)
|
||||
score += reward
|
||||
episode_step += 1
|
||||
|
||||
score_history.append(score)
|
||||
print(
|
||||
f'\rEpisode n°{episode} Steps: {episode_step} '
|
||||
f'\tScore: {score:.3f} \tMean: {np.mean(score_history):.3f}'
|
||||
f'\tTime: {get_str_time(env.market.current_time)}'
|
||||
)
|
||||
93
portfolio_management/environment/market.py
Normal file
93
portfolio_management/environment/market.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from math import ceil
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import xarray as xr
|
||||
|
||||
from portfolio_management.data import get_dataset, sample_dataset
|
||||
|
||||
|
||||
class Market:
|
||||
def __init__(
|
||||
self,
|
||||
folder_path: str,
|
||||
currencies: list,
|
||||
config: dict,
|
||||
max_steps: int,
|
||||
timestep_per_step: int = 1,
|
||||
apply_log: bool = True,
|
||||
chronologically: bool = True,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None
|
||||
):
|
||||
assert timestep_per_step <= config[max(config.keys())], \
|
||||
'Impossible to do more timestep than the largest timestep\'s step'
|
||||
|
||||
self.folder_path = folder_path
|
||||
self.currencies = currencies
|
||||
self.timestep_to_number = config
|
||||
self.max_steps = max_steps
|
||||
self.timestep_per_step = timestep_per_step
|
||||
self.apply_log = apply_log
|
||||
self.chronologically = chronologically
|
||||
self.end_date = end_date
|
||||
self.start_date = start_date
|
||||
|
||||
self.datasets = {}
|
||||
self.main_sample = None
|
||||
|
||||
for time_step, number in sorted(self.timestep_to_number.items()):
|
||||
suffix = f'USD_{time_step}.csv'
|
||||
dataset = get_dataset(
|
||||
currencies,
|
||||
suffix=suffix,
|
||||
folder_path=folder_path,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
self.datasets[time_step] = dataset
|
||||
|
||||
self.main_timestep = max(self.timestep_to_number.keys())
|
||||
self.constraint = ceil(max([ts*n for ts, n in self.timestep_to_number.items()]) / self.main_timestep)
|
||||
self.current_time = self.datasets[self.main_timestep]['time'][self.constraint]
|
||||
|
||||
def reset(self):
|
||||
if not self.chronologically:
|
||||
self.current_time = np.random.choice(self.datasets[self.main_timestep]['time'][self.constraint: - self.constraint])
|
||||
if self.current_time > self.datasets[self.main_timestep]['time'][-self.constraint]:
|
||||
self.current_time = self.datasets[self.main_timestep]['time'][self.constraint]
|
||||
print('restart from zero')
|
||||
|
||||
def step(self):
|
||||
raw_observation = []
|
||||
for time_step, number in sorted(self.timestep_to_number.items()):
|
||||
sample = sample_dataset(self.datasets[time_step], number, self.current_time)
|
||||
raw_observation.append(self._flatten(sample))
|
||||
if time_step == self.main_timestep:
|
||||
self.main_sample = sample
|
||||
|
||||
self.current_time += self.main_timestep * self.timestep_per_step * 60
|
||||
open = np.array(self.main_sample.isel(time=-self.timestep_per_step)['open'])
|
||||
close = np.array(self.main_sample.isel(time=-1)['close'])
|
||||
observation = np.concatenate(raw_observation, axis=None)
|
||||
|
||||
if self.apply_log:
|
||||
observation = np.log(observation)
|
||||
|
||||
return observation, open, close
|
||||
|
||||
@staticmethod
|
||||
def _flatten(dataset: xr.Dataset) -> np.array:
|
||||
return np.array(dataset.to_array()).flatten()
|
||||
|
||||
def __repr__(self):
|
||||
return f'<{self.__class__.__name__}('\
|
||||
f'folder_path={self.folder_path!r},' \
|
||||
f'currencies={self.currencies!r},' \
|
||||
f'config={self.timestep_to_number!r},' \
|
||||
f'max_steps={self.max_steps!r},' \
|
||||
f'timestep_per_step={self.timestep_per_step!r}, ' \
|
||||
f'apply_log={self.apply_log!r}, ' \
|
||||
f'chronologically={self.chronologically!r}, ' \
|
||||
f'start_date={self.start_date!r}, ' \
|
||||
f'end_date={self.end_date!r})>'
|
||||
71
portfolio_management/environment/portfolio.py
Normal file
71
portfolio_management/environment/portfolio.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from typing import Union
|
||||
from typing import List
|
||||
import numpy as np
|
||||
|
||||
from portfolio_management.utilities import loguniform
|
||||
|
||||
|
||||
class Portfolio:
|
||||
def __init__(
|
||||
self,
|
||||
currencies: list,
|
||||
fees: float,
|
||||
principal_range: List[float]
|
||||
):
|
||||
self.amount = None
|
||||
self.principal = None
|
||||
|
||||
self.proportions = None
|
||||
|
||||
self.fees = fees
|
||||
self.currencies = currencies
|
||||
self.principal_range = principal_range
|
||||
|
||||
def reset(self):
|
||||
self.proportions = None
|
||||
self.amount = self.principal = loguniform(*self.principal_range)
|
||||
return self.state
|
||||
|
||||
def step(
|
||||
self,
|
||||
new_proportions: Union[list, np.array],
|
||||
open: Union[list, np.array],
|
||||
close: Union[list, np.array],
|
||||
):
|
||||
if self.proportions is None:
|
||||
fees = 0
|
||||
else:
|
||||
fees = np.sum(np.abs(np.array(new_proportions) - self.proportions) * self.amount * self.fees)
|
||||
|
||||
self.proportions = np.array(new_proportions)
|
||||
values = self.proportions * self.amount
|
||||
growth = np.array(close) / np.array(open)
|
||||
new_values = values * growth
|
||||
new_amount = np.sum(new_values)
|
||||
|
||||
reward = (new_amount - self.amount - fees) / self.amount * 100 # todo maybe use principal if not stable
|
||||
self.amount = new_amount - fees
|
||||
|
||||
return reward, self.amount, self.state
|
||||
|
||||
@property
|
||||
def state(self) -> np.array:
|
||||
proportions = self.proportions if self.proportions is not None else np.zeros(shape=len(self.currencies))
|
||||
state = np.concatenate((
|
||||
[np.log(self.amount)],
|
||||
[np.log(self.principal)],
|
||||
proportions,
|
||||
))
|
||||
return state
|
||||
|
||||
def __repr__(self):
|
||||
return f'<{self.__class__.__name__}('\
|
||||
f'currencies={self.currencies!r},' \
|
||||
f'fees={self.fees}, ' \
|
||||
f'principal_range={self.principal_range})>'
|
||||
|
||||
def __str__(self):
|
||||
return f'<{self.__class__.__name__} '\
|
||||
f'amount={self.amount}, ' \
|
||||
f'principal={self.principal}, ' \
|
||||
f'proportions={self.proportions})>'
|
||||
19
portfolio_management/environment/utilities.py
Normal file
19
portfolio_management/environment/utilities.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import datetime
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_unix_time(date: str):
|
||||
time = datetime.datetime.strptime(date, '%Y%m%d')
|
||||
return int((time - datetime.datetime(1970, 1, 1)).total_seconds())
|
||||
|
||||
|
||||
def get_str_time(unix_time):
|
||||
return datetime.datetime.fromtimestamp(int(unix_time)).strftime('%Y%m%d')
|
||||
|
||||
|
||||
def loguniform(low, high):
|
||||
return np.exp(np.random.uniform(np.log(low), np.log(high)))
|
||||
|
||||
|
||||
def rate_calculator(amount, principal, time):
|
||||
return np.log(amount/principal) / time
|
||||
24
portfolio_management/utilities.py
Normal file
24
portfolio_management/utilities.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import datetime
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_unix_time(date: str):
|
||||
time = datetime.datetime.strptime(date, '%Y%m%d')
|
||||
return int((time - datetime.datetime(1970, 1, 1)).total_seconds())
|
||||
|
||||
|
||||
def get_str_time(unix_time):
|
||||
return datetime.datetime.fromtimestamp(int(unix_time)).strftime('%Y%m%d')
|
||||
|
||||
|
||||
def loguniform(low, high):
|
||||
return np.exp(np.random.uniform(np.log(low), np.log(high)))
|
||||
|
||||
|
||||
def rate_calculator(amount, principal, time):
|
||||
return np.log(amount/principal) / time
|
||||
|
||||
|
||||
def create_folders(path: Path) -> None:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
10
requirements.txt
Normal file
10
requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
setuptools~=52.0.0
|
||||
pandas~=1.2.4
|
||||
yaml~=0.2.5
|
||||
pyyaml~=5.4.1
|
||||
sqlalchemy~=1.4.15
|
||||
xarray~=0.17.0
|
||||
gym~=0.18.0
|
||||
numpy~=1.20.2
|
||||
scipy~=1.6.2
|
||||
python-binance
|
||||
3
setup.cfg
Normal file
3
setup.cfg
Normal file
@@ -0,0 +1,3 @@
|
||||
[metadata]
|
||||
name = portfolio_management
|
||||
version = 0.0.1
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/database/__init__.py
Normal file
0
tests/database/__init__.py
Normal file
25
tests/database/test_core.py
Normal file
25
tests/database/test_core.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from binance import Client
|
||||
from portfolio_management.database.core import setup_database
|
||||
|
||||
|
||||
def test_core():
|
||||
folder_path = 'D:\\Thomas\\Python\\gym-portfolio-2\\databases'
|
||||
database_name = 'test'
|
||||
|
||||
symbol_list = ["ETHBTC", "BNBBTC"]
|
||||
interval = Client.KLINE_INTERVAL_30MINUTE
|
||||
start = "2017-11-12"
|
||||
end = "2017-11-14"
|
||||
|
||||
setup_database(
|
||||
folder_path=folder_path,
|
||||
database_name=database_name,
|
||||
symbol_list=symbol_list,
|
||||
interval=interval,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_core()
|
||||
13
tests/database/test_initialization.py
Normal file
13
tests/database/test_initialization.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from portfolio_management.database.initialization import Initialization
|
||||
|
||||
|
||||
def test_database_initialization():
|
||||
folder_path = 'D:\\Thomas\\Python\\gym-portfolio-2\\databases'
|
||||
symbol_list = ['ETH', 'BTC']
|
||||
|
||||
initialization = Initialization(folder_path=folder_path, database_name='test')
|
||||
initialization.run(symbol_list=symbol_list)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_database_initialization()
|
||||
28
tests/database/test_insert.py
Normal file
28
tests/database/test_insert.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from binance import Client
|
||||
|
||||
from portfolio_management.api import get_kline_dataframe
|
||||
|
||||
from portfolio_management.database.initialization import Initialization
|
||||
from portfolio_management.database.insert import Insert
|
||||
|
||||
|
||||
def test_database_insert():
|
||||
folder_path = 'D:\\Thomas\\GitHub\\portfolio-management\\databases'
|
||||
database_name = 'test'
|
||||
|
||||
symbol_list = ["ETHBTC"]
|
||||
interval = Client.KLINE_INTERVAL_30MINUTE
|
||||
start = "2017-11-12"
|
||||
end = "2017-11-14"
|
||||
|
||||
initialization = Initialization(folder_path=folder_path, database_name=database_name)
|
||||
initialization.run(symbol_list=symbol_list, reset_tables=True)
|
||||
insert = Insert(folder_path=folder_path, database_name=database_name)
|
||||
|
||||
for symbol in symbol_list:
|
||||
data = get_kline_dataframe(symbol=symbol, interval=interval, start=start, end=end)
|
||||
insert.run(symbol=symbol, data=data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_database_insert()
|
||||
42
tests/database/test_retrieve.py
Normal file
42
tests/database/test_retrieve.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from binance import Client
|
||||
|
||||
from portfolio_management.api import get_kline_dataframe
|
||||
|
||||
from portfolio_management.database.initialization import Initialization
|
||||
from portfolio_management.database.insert import Insert
|
||||
|
||||
from portfolio_management.database.retrieve import get_dataframe
|
||||
from portfolio_management.database.retrieve import get_dataset
|
||||
|
||||
|
||||
def test_database_retrieve():
|
||||
folder_path = 'D:\\Thomas\\Python\\gym-portfolio-2\\databases'
|
||||
database_name = 'test'
|
||||
|
||||
symbol_list = ["ETHBTC", "BNBBTC"]
|
||||
interval = Client.KLINE_INTERVAL_30MINUTE
|
||||
start = "2017-11-12"
|
||||
end = "2017-11-14"
|
||||
|
||||
initialization = Initialization(folder_path=folder_path, database_name=database_name)
|
||||
initialization.run(symbol_list=symbol_list, reset_tables=True)
|
||||
insert = Insert(folder_path=folder_path, database_name=database_name)
|
||||
|
||||
for symbol in symbol_list:
|
||||
data = get_kline_dataframe(symbol=symbol, interval=interval, start=start, end=end)
|
||||
insert.run(symbol=symbol, data=data)
|
||||
|
||||
dataframe = get_dataframe(
|
||||
folder_path=folder_path,
|
||||
database_name=database_name,
|
||||
symbol=symbol_list[0]
|
||||
)
|
||||
|
||||
dataset = get_dataset(
|
||||
folder_path=folder_path,
|
||||
database_name=database_name,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_database_retrieve()
|
||||
14
tests/test_get_kline_dataframe.py
Normal file
14
tests/test_get_kline_dataframe.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from portfolio_management.api import get_kline_dataframe
|
||||
from binance import Client
|
||||
|
||||
|
||||
def test_get_kline_dataframe():
|
||||
symbol = "ETHBTC"
|
||||
interval = Client.KLINE_INTERVAL_30MINUTE
|
||||
start = "2017-11-12"
|
||||
end = "2017-11-14"
|
||||
df = get_kline_dataframe(symbol=symbol, interval=interval, start=start, end=end)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_get_kline_dataframe()
|
||||
Reference in New Issue
Block a user