graph, ignores

This commit is contained in:
Dobromir Popov 2025-02-02 01:06:39 +02:00
parent 08f26785ea
commit c7c7acdb26
2 changed files with 90 additions and 46 deletions

2
.gitignore vendored
View File

@ -29,3 +29,5 @@ crypto/sol/logs/transation_details.json
.env .env
app_data.db app_data.db
crypto/sol/.vs/* crypto/sol/.vs/*
crypto/brian/models/best/*
crypto/brian/models/last/*

View File

@ -15,6 +15,7 @@ import torch.optim as optim
import numpy as np import numpy as np
from collections import deque from collections import deque
from datetime import datetime from datetime import datetime
import matplotlib.pyplot as plt
# --- Directories for saving models --- # --- Directories for saving models ---
LAST_DIR = os.path.join("models", "last") LAST_DIR = os.path.join("models", "last")
@ -60,12 +61,12 @@ def maintain_checkpoint_directory(directory, max_files=10):
def get_best_models(directory): def get_best_models(directory):
"""Return a list of (reward, filename) for files in the best folder. """Return a list of (reward, filename) for files in the best folder.
Expecting filenames like: best_{reward:.4f}_epoch_{epoch}_{timestamp}.pt""" Expected filename format: best_{reward:.4f}_epoch_{epoch}_{timestamp}.pt"""
best_files = [] best_files = []
for file in os.listdir(directory): for file in os.listdir(directory):
parts = file.split("_") parts = file.split("_")
try: try:
# parts[1] should be reward # parts[1] should be the reward
r = float(parts[1]) r = float(parts[1])
best_files.append((r, file)) best_files.append((r, file))
except Exception: except Exception:
@ -73,21 +74,18 @@ def get_best_models(directory):
return best_files return best_files
def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR): def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
"""Save the model state always to the last_dir and conditionally to best_dir if reward is high enough.""" """Save the model state at each epoch to last_dir and, conditionally, to best_dir."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# last_filename = f"model_last_epoch_{epoch}_{timestamp}.pt" last_filename = f"model_last_epoch_{epoch}_{timestamp}.pt"
last_filename = f"model_last_epoch_{epoch}.pt"
last_path = os.path.join(last_dir, last_filename) last_path = os.path.join(last_dir, last_filename)
torch.save({ torch.save({
"epoch": epoch, "epoch": epoch,
"reward": reward, "reward": reward,
"model_state_dict": model.state_dict() "model_state_dict": model.state_dict()
}, last_path) }, last_path)
# Keep only last 10 models in last_dir. # Maintain only last 10 checkpoints
maintain_checkpoint_directory(last_dir, max_files=10) maintain_checkpoint_directory(last_dir, max_files=10)
# Check the best folder if fewer than 10, simply add;
# Otherwise, add only if reward is higher than the lowest reward in best.
best_models = get_best_models(best_dir) best_models = get_best_models(best_dir)
add_to_best = False add_to_best = False
if len(best_models) < 10: if len(best_models) < 10:
@ -96,11 +94,9 @@ def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
min_reward, min_file = min(best_models, key=lambda x: x[0]) min_reward, min_file = min(best_models, key=lambda x: x[0])
if reward > min_reward: if reward > min_reward:
add_to_best = True add_to_best = True
# Remove the worst checkpoint.
os.remove(os.path.join(best_dir, min_file)) os.remove(os.path.join(best_dir, min_file))
if add_to_best: if add_to_best:
# best_filename = f"best_{reward:.4f}_epoch_{epoch}_{timestamp}.pt" best_filename = f"best_{reward:.4f}_epoch_{epoch}_{timestamp}.pt"
best_filename = f"best_epoch_{epoch}.pt"
best_path = os.path.join(best_dir, best_filename) best_path = os.path.join(best_dir, best_filename)
torch.save({ torch.save({
"epoch": epoch, "epoch": epoch,
@ -111,7 +107,7 @@ def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}") print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}")
def load_best_checkpoint(model, best_dir=BEST_DIR): def load_best_checkpoint(model, best_dir=BEST_DIR):
"""Load the best checkpoint (with highest reward) from the best directory if available.""" """Load the best checkpoint (with highest reward) if available."""
best_models = get_best_models(best_dir) best_models = get_best_models(best_dir)
if not best_models: if not best_models:
return None return None
@ -157,7 +153,7 @@ class ReplayBuffer:
return len(self.buffer) return len(self.buffer)
# ------------------------------------- # -------------------------------------
# A Simple Indicator and Feature Preparation Function # Indicator and Feature Preparation Function
# ------------------------------------- # -------------------------------------
def compute_indicators(candle, additional_data): def compute_indicators(candle, additional_data):
""" """
@ -177,7 +173,7 @@ def compute_indicators(candle, additional_data):
return np.array(features, dtype=np.float32) return np.array(features, dtype=np.float32)
# ------------------------------------- # -------------------------------------
# RL Agent with Q-Learning Update and Epsilon-Greedy Exploration # RL Agent with Q-Learning and Epsilon-Greedy Exploration
# ------------------------------------- # -------------------------------------
class ContinuousRLAgent: class ContinuousRLAgent:
def __init__(self, model, optimizer, replay_buffer, batch_size=32, gamma=0.99): def __init__(self, model, optimizer, replay_buffer, batch_size=32, gamma=0.99):
@ -189,7 +185,6 @@ class ContinuousRLAgent:
self.gamma = gamma self.gamma = gamma
def act(self, state, epsilon=0.1): def act(self, state, epsilon=0.1):
# ε-greedy: choose random action with probability epsilon.
if np.random.rand() < epsilon: if np.random.rand() < epsilon:
return np.random.randint(0, 3) return np.random.randint(0, 3)
state_tensor = torch.from_numpy(np.array(state, dtype=np.float32)).unsqueeze(0) state_tensor = torch.from_numpy(np.array(state, dtype=np.float32)).unsqueeze(0)
@ -199,12 +194,10 @@ class ContinuousRLAgent:
return action return action
def train_step(self): def train_step(self):
# Only train if we have enough samples.
if len(self.replay_buffer) < self.batch_size: if len(self.replay_buffer) < self.batch_size:
return return
batch = self.replay_buffer.sample(self.batch_size) batch = self.replay_buffer.sample(self.batch_size)
# Unpack the batch; each experience is (state, action, reward, next_state, done)
states, actions, rewards, next_states, dones = zip(*batch) states, actions, rewards, next_states, dones = zip(*batch)
states_tensor = torch.from_numpy(np.array(states, dtype=np.float32)) states_tensor = torch.from_numpy(np.array(states, dtype=np.float32))
actions_tensor = torch.tensor(actions, dtype=torch.int64) actions_tensor = torch.tensor(actions, dtype=torch.int64)
@ -224,12 +217,12 @@ class ContinuousRLAgent:
self.optimizer.step() self.optimizer.step()
# ------------------------------------- # -------------------------------------
# Historical Data Fetching Function # Historical Data Fetching Functions
# ------------------------------------- # -------------------------------------
async def fetch_historical_data(exchange, symbol, timeframe, since, end_time, batch_size=500): async def fetch_historical_data(exchange, symbol, timeframe, since, end_time, batch_size=500):
""" """
Fetch historical OHLCV data for the given symbol and timeframe. Fetch historical OHLCV data for a given symbol and timeframe.
"since" and "end_time" are in milliseconds. "since" and "end_time" are given in milliseconds.
""" """
candles = [] candles = []
since_ms = since since_ms = since
@ -274,17 +267,20 @@ async def get_cached_or_fetch_data(exchange, symbol, timeframe, since, end_time,
return candles return candles
# ------------------------------------- # -------------------------------------
# Backtest Environment Class Definition # Backtest Environment with Trade History Recording
# ------------------------------------- # -------------------------------------
class BacktestEnvironment: class BacktestEnvironment:
def __init__(self, candles): def __init__(self, candles):
self.candles = candles self.candles = candles
self.current_index = 0 self.current_index = 0
self.position = None # Holds an open position, if any self.position = None # Active position: dict with 'entry_price' and 'entry_index'
self.trade_history = [] # List of closed trades
def reset(self): def reset(self, clear_trade_history=True):
self.current_index = 0 self.current_index = 0
self.position = None self.position = None
if clear_trade_history:
self.trade_history = []
return self.get_state(self.current_index) return self.get_state(self.current_index)
def get_state(self, index): def get_state(self, index):
@ -298,9 +294,9 @@ class BacktestEnvironment:
def step(self, action): def step(self, action):
""" """
Simulate a trading step. Simulate a trading step:
- If not in a position and action is BUY (2), enter a long position at the next candle's open. - If not in a position and action is BUY (2), record an entry at next candle's open.
- If in a position and action is SELL (0), close the position at the next candle's open. - If in a position and action is SELL (0), record an exit at next candle's open and compute PnL.
Returns: (current_state, reward, next_state, done) Returns: (current_state, reward, next_state, done)
""" """
if self.current_index >= len(self.candles) - 1: if self.current_index >= len(self.candles) - 1:
@ -314,31 +310,79 @@ class BacktestEnvironment:
reward = 0.0 reward = 0.0
# Action mapping: 0 -> SELL, 1 -> HOLD, 2 -> BUY. # Action mapping: 0 -> SELL, 1 -> HOLD, 2 -> BUY.
# If not in a position:
if self.position is None: if self.position is None:
if action == 2: # BUY signal: if action == 2: # BUY signal: enter position at next candle's open.
entry_price = next_candle['open'] entry_price = next_candle['open']
self.position = {'entry_price': entry_price, 'entry_index': self.current_index} self.position = {'entry_price': entry_price, 'entry_index': self.current_index}
else: else:
if action == 0: # SELL signal: if action == 0: # SELL signal: exit position at next candle's open.
sell_price = next_candle['open'] exit_price = next_candle['open']
reward = sell_price - self.position['entry_price'] reward = exit_price - self.position['entry_price']
trade = {
'entry_index': self.position['entry_index'],
'entry_price': self.position['entry_price'],
'exit_index': next_index,
'exit_price': exit_price,
'pnl': reward
}
self.trade_history.append(trade)
self.position = None self.position = None
self.current_index = next_index self.current_index = next_index
done = (self.current_index >= len(self.candles) - 1) done = (self.current_index >= len(self.candles) - 1)
return current_state, reward, next_state, done return current_state, reward, next_state, done
# -------------------------------------
# Plot Trading Chart with Buy/Sell Markers and PnL Annotations
# -------------------------------------
def plot_trade_history(candles, trade_history):
# Extract close price series from candles.
close_prices = [candle['close'] for candle in candles]
x = list(range(len(close_prices)))
plt.figure(figsize=(12, 6))
plt.plot(x, close_prices, label="Close Price", color="black", linewidth=1)
# Plot markers only once (avoid duplicate labels)
buy_plotted = False
sell_plotted = False
for trade in trade_history:
entry_idx = trade["entry_index"]
exit_idx = trade["exit_index"]
entry_price = trade["entry_price"]
exit_price = trade["exit_price"]
pnl = trade["pnl"]
if not buy_plotted:
plt.plot(entry_idx, entry_price, marker="^", color="green", markersize=10, label="BUY")
buy_plotted = True
else:
plt.plot(entry_idx, entry_price, marker="^", color="green", markersize=10)
if not sell_plotted:
plt.plot(exit_idx, exit_price, marker="v", color="red", markersize=10, label="SELL")
sell_plotted = True
else:
plt.plot(exit_idx, exit_price, marker="v", color="red", markersize=10)
plt.text(exit_idx, exit_price, f"{pnl:+.2f}", color="blue", fontsize=8)
plt.title("Trade History with PnL After Order Close")
plt.xlabel("Candle Index")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.show()
# ------------------------------------- # -------------------------------------
# Training Loop Over Historical Data (Backtest) # Training Loop Over Historical Data (Backtest)
# ------------------------------------- # -------------------------------------
def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1): def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1):
""" """
For each epoch, run through the entire historical episode. For each epoch, run through the historical episode.
At each step, pick an action (using ε-greedy), simulate a trade, store the experience, At each step, select an action (using εgreedy), simulate a trade,
and update the model. Then log the cumulative reward and save checkpoints. store the experience, and update the network.
After the epoch, log the total reward and save checkpoints.
""" """
for epoch in range(1, num_epochs + 1): for epoch in range(1, num_epochs + 1):
state = env.reset() state = env.reset() # clear trade history each epoch
done = False done = False
total_reward = 0.0 total_reward = 0.0
steps = 0 steps = 0
@ -348,20 +392,18 @@ def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1):
state, reward, next_state, done = env.step(action) state, reward, next_state, done = env.step(action)
if next_state is None: if next_state is None:
next_state = np.zeros_like(prev_state) next_state = np.zeros_like(prev_state)
# Save the experience (state, action, reward, next_state, done)
rl_agent.replay_buffer.add((prev_state, action, reward, next_state, done)) rl_agent.replay_buffer.add((prev_state, action, reward, next_state, done))
rl_agent.train_step() rl_agent.train_step()
total_reward += reward total_reward += reward
steps += 1 steps += 1
print(f"Epoch {epoch}/{num_epochs} completed, total reward: {total_reward:.4f} over {steps} steps.") print(f"Epoch {epoch}/{num_epochs} completed, total reward: {total_reward:.4f} over {steps} steps.")
# Save a checkpoint after the epoch.
save_checkpoint(rl_agent.model, epoch, total_reward, LAST_DIR, BEST_DIR) save_checkpoint(rl_agent.model, epoch, total_reward, LAST_DIR, BEST_DIR)
# ------------------------------------- # -------------------------------------
# Main Asynchronous Function for Backtest Training # Main Asynchronous Function for Backtest Training and Charting
# ------------------------------------- # -------------------------------------
async def main_backtest(): async def main_backtest():
# Define symbol, timeframe, and historical period. # Define symbol, timeframe, and period.
symbol = 'BTC/USDT' symbol = 'BTC/USDT'
timeframe = '1m' timeframe = '1m'
now = int(time.time() * 1000) now = int(time.time() * 1000)
@ -386,10 +428,7 @@ async def main_backtest():
await exchange.close() await exchange.close()
return return
# Save updated cache.
save_candles_cache(CACHE_FILE, candles) save_candles_cache(CACHE_FILE, candles)
# Initialize backtest environment.
env = BacktestEnvironment(candles) env = BacktestEnvironment(candles)
# Model dimensions: 5 (OHLCV) + 3 (sentiment) = 8. # Model dimensions: 5 (OHLCV) + 3 (sentiment) = 8.
@ -402,15 +441,15 @@ async def main_backtest():
replay_buffer = ReplayBuffer(capacity=10000) replay_buffer = ReplayBuffer(capacity=10000)
rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99) rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
# At training start, try loading a best checkpoint (if available). # At training start, try loading the best checkpoint if available.
load_best_checkpoint(model, BEST_DIR) load_best_checkpoint(model, BEST_DIR)
# Run training over historical data. # Run training (backtesting) over historical data.
num_epochs = 10 # Change as needed. num_epochs = 10 # adjust as needed.
train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1) train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
# Final simulation (without exploration) to check cumulative profit. # Final simulation (without exploration) to log trade history.
state = env.reset() state = env.reset(clear_trade_history=True)
done = False done = False
cumulative_reward = 0.0 cumulative_reward = 0.0
while not done: while not done:
@ -420,6 +459,9 @@ async def main_backtest():
state = next_state state = next_state
print("Final backtest simulation cumulative profit:", cumulative_reward) print("Final backtest simulation cumulative profit:", cumulative_reward)
# Draw the chart: plot close price with BUY/SELL markers and PnL annotations.
plot_trade_history(candles, env.trade_history)
await exchange.close() await exchange.close()
if __name__ == "__main__": if __name__ == "__main__":