RL training

This commit is contained in:
Dobromir Popov
2025-03-31 03:31:54 +03:00
parent 1610d5bd49
commit 4eac14022c
9 changed files with 1492 additions and 247 deletions

View File

@ -475,7 +475,7 @@ class CNNModelPyTorch:
diversity_weight * diversity_loss)
return total_loss, action_loss, price_loss
def train_epoch(self, X_train, y_train, future_prices, batch_size):
"""Train the model for one epoch with focus on short-term pattern recognition"""
self.model.train()
@ -919,13 +919,7 @@ class CNNModelPyTorch:
logger.info(f"Backup saved to {backup_path}")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
"""Load model weights from file"""
if not os.path.exists(f"{filepath}.pt"):
logger.error(f"Model file {filepath}.pt not found")
return False
@ -938,27 +932,20 @@ class CNNModelPyTorch:
self.window_size = model_state['window_size']
self.num_features = model_state['num_features']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
self.timeframes = model_state.get('timeframes', ["1m"])
# Load model state dict
self.load_state_dict(model_state['model_state_dict'])
# Load optimizer state if available
if 'optimizer_state_dict' in model_state:
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
# Load trading configuration if available
if 'confidence_threshold' in model_state:
self.confidence_threshold = model_state['confidence_threshold']
if 'max_consecutive_same_action' in model_state:
self.max_consecutive_same_action = model_state['max_consecutive_same_action']
if 'action_counts' in model_state:
self.action_counts = model_state['action_counts']
if 'last_actions' in model_state:
self.last_actions = model_state['last_actions']
# Rebuild the model
self.build_model()
# Load the model state
self.model.load_state_dict(model_state['model_state_dict'])
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
self.history = model_state['history']
logger.info(f"Model loaded from {filepath}.pt")
# Log model version information if available
if 'model_version' in model_state:
@ -973,7 +960,7 @@ class CNNModelPyTorch:
def plot_training_history(self, metrics_file="NN/models/saved/training_metrics.json"):
"""
Generate comprehensive performance visualization plots from training history
Plot training history from saved metrics.
Args:
metrics_file: Path to the saved metrics JSON file
@ -983,253 +970,72 @@ class CNNModelPyTorch:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import numpy as np
import os
# Create directory for plots
plots_dir = "NN/models/saved/performance_plots"
os.makedirs(plots_dir, exist_ok=True)
# Load metrics
with open(metrics_file, 'r') as f:
metrics = json.load(f)
epochs = metrics["epoch"]
# Set default style for better visualization
plt.style.use('seaborn-darkgrid')
# Create plots directory
plots_dir = os.path.join(os.path.dirname(metrics_file), 'plots')
os.makedirs(plots_dir, exist_ok=True)
# Convert timestamps to datetime objects
timestamps = [datetime.fromisoformat(ts) for ts in metrics['timestamps']]
# 1. Plot Loss and Accuracy
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
# Loss plot
ax1.plot(epochs, metrics["train_loss"], 'b-', label='Training Loss')
ax1.plot(epochs, metrics["val_loss"], 'r-', label='Validation Loss')
ax1.set_title('Model Loss over Epochs', fontsize=16)
ax1.set_ylabel('Loss', fontsize=14)
ax1.legend(loc='upper right', fontsize=12)
ax1.plot(timestamps, metrics['train_loss'], 'b-', label='Training Loss')
ax1.plot(timestamps, metrics['val_loss'], 'r-', label='Validation Loss')
ax1.set_title('Model Loss Over Time')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)
# Accuracy plot
ax2.plot(epochs, metrics["train_acc"], 'b-', label='Training Accuracy')
ax2.plot(epochs, metrics["val_acc"], 'r-', label='Validation Accuracy')
ax2.set_title('Model Accuracy over Epochs', fontsize=16)
ax2.set_xlabel('Epoch', fontsize=14)
ax2.set_ylabel('Accuracy', fontsize=14)
ax2.legend(loc='lower right', fontsize=12)
ax2.plot(timestamps, metrics['train_acc'], 'g-', label='Training Accuracy')
ax2.plot(timestamps, metrics['val_acc'], 'm-', label='Validation Accuracy')
ax2.set_title('Model Accuracy Over Time')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True)
# Format x-axis
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
plt.xticks(rotation=45)
# Save the plot
plt.tight_layout()
plt.savefig(f"{plots_dir}/loss_accuracy.png", dpi=300)
plt.savefig(os.path.join(plots_dir, 'loss_accuracy.png'))
plt.close()
# 2. Plot PnL and Win Rate
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
# PnL plot
ax1.plot(epochs, metrics["train_pnl"], 'g-', label='Training PnL')
ax1.plot(epochs, metrics["val_pnl"], 'm-', label='Validation PnL')
ax1.set_title('Trading Profit and Loss over Epochs', fontsize=16)
ax1.set_ylabel('PnL', fontsize=14)
ax1.legend(loc='upper left', fontsize=12)
ax1.plot(timestamps, metrics['train_pnl'], 'g-', label='Training PnL')
ax1.plot(timestamps, metrics['val_pnl'], 'r-', label='Validation PnL')
ax1.set_title('PnL Over Time')
ax1.set_ylabel('PnL')
ax1.legend()
ax1.grid(True)
# Win Rate plot
ax2.plot(epochs, metrics["train_win_rate"], 'g-', label='Training Win Rate')
ax2.plot(epochs, metrics["val_win_rate"], 'm-', label='Validation Win Rate')
ax2.set_title('Trading Win Rate over Epochs', fontsize=16)
ax2.set_xlabel('Epoch', fontsize=14)
ax2.set_ylabel('Win Rate', fontsize=14)
ax2.axhline(y=0.5, color='r', linestyle='--', label='50% Threshold')
ax2.legend(loc='lower right', fontsize=12)
ax2.plot(timestamps, metrics['train_win_rate'], 'b-', label='Training Win Rate')
ax2.plot(timestamps, metrics['val_win_rate'], 'm-', label='Validation Win Rate')
ax2.set_title('Win Rate Over Time')
ax2.set_ylabel('Win Rate')
ax2.legend()
ax2.grid(True)
# Format x-axis
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
plt.xticks(rotation=45)
# Save the plot
plt.tight_layout()
plt.savefig(f"{plots_dir}/pnl_winrate.png", dpi=300)
plt.close()
# 3. Plot Signal Distribution over time
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
# Training Signal Distribution
buy_train = [epoch_dist["train"]["BUY"] for epoch_dist in metrics["signal_distribution"]]
sell_train = [epoch_dist["train"]["SELL"] for epoch_dist in metrics["signal_distribution"]]
hold_train = [epoch_dist["train"]["HOLD"] for epoch_dist in metrics["signal_distribution"]]
ax1.stackplot(epochs, buy_train, hold_train, sell_train,
labels=['BUY', 'HOLD', 'SELL'],
colors=['green', 'gray', 'red'], alpha=0.7)
ax1.set_title('Training Signal Distribution over Epochs', fontsize=16)
ax1.set_ylabel('Proportion', fontsize=14)
ax1.legend(loc='upper right', fontsize=12)
ax1.set_ylim(0, 1)
ax1.grid(True)
# Validation Signal Distribution
buy_val = [epoch_dist["val"]["BUY"] for epoch_dist in metrics["signal_distribution"]]
sell_val = [epoch_dist["val"]["SELL"] for epoch_dist in metrics["signal_distribution"]]
hold_val = [epoch_dist["val"]["HOLD"] for epoch_dist in metrics["signal_distribution"]]
ax2.stackplot(epochs, buy_val, hold_val, sell_val,
labels=['BUY', 'HOLD', 'SELL'],
colors=['green', 'gray', 'red'], alpha=0.7)
ax2.set_title('Validation Signal Distribution over Epochs', fontsize=16)
ax2.set_xlabel('Epoch', fontsize=14)
ax2.set_ylabel('Proportion', fontsize=14)
ax2.legend(loc='upper right', fontsize=12)
ax2.set_ylim(0, 1)
ax2.grid(True)
plt.tight_layout()
plt.savefig(f"{plots_dir}/signal_distribution.png", dpi=300)
plt.close()
# 4. Performance Correlation Matrix
fig, ax = plt.subplots(figsize=(10, 8))
# Extract key metrics for correlation
corr_data = {}
corr_data['Loss'] = metrics["train_loss"]
corr_data['Accuracy'] = metrics["train_acc"]
corr_data['PnL'] = metrics["train_pnl"]
corr_data['Win Rate'] = metrics["train_win_rate"]
corr_data['BUY %'] = buy_train
corr_data['SELL %'] = sell_train
corr_data['HOLD %'] = hold_train
# Convert to numpy array
corr_matrix = np.zeros((len(corr_data), len(corr_data)))
labels = list(corr_data.keys())
# Calculate correlation
for i, key1 in enumerate(labels):
for j, key2 in enumerate(labels):
if i == j:
corr_matrix[i, j] = 1.0
else:
corr = np.corrcoef(corr_data[key1], corr_data[key2])[0, 1]
corr_matrix[i, j] = corr
# Plot heatmap
im = ax.imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
# Add colorbar
cbar = fig.colorbar(im, ax=ax)
cbar.set_label('Correlation', rotation=270, labelpad=20, fontsize=14)
# Add ticks and labels
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=12)
ax.set_yticklabels(labels, fontsize=12)
# Add text annotations
for i in range(len(labels)):
for j in range(len(labels)):
text = ax.text(j, i, f"{corr_matrix[i, j]:.2f}",
ha="center", va="center", color="black" if abs(corr_matrix[i, j]) < 0.7 else "white")
ax.set_title('Correlation Matrix of Performance Metrics', fontsize=16)
plt.tight_layout()
plt.savefig(f"{plots_dir}/correlation_matrix.png", dpi=300)
plt.close()
# 5. Combined Performance Dashboard
fig = plt.figure(figsize=(16, 20))
# Define grid layout
gs = fig.add_gridspec(4, 2, hspace=0.4, wspace=0.3)
# Plot 1: Loss curves
ax1 = fig.add_subplot(gs[0, 0])
ax1.plot(epochs, metrics["train_loss"], 'b-', label='Training')
ax1.plot(epochs, metrics["val_loss"], 'r-', label='Validation')
ax1.set_title('Loss', fontsize=14)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss', fontsize=12)
ax1.legend(fontsize=10)
ax1.grid(True)
# Plot 2: Accuracy
ax2 = fig.add_subplot(gs[0, 1])
ax2.plot(epochs, metrics["train_acc"], 'b-', label='Training')
ax2.plot(epochs, metrics["val_acc"], 'r-', label='Validation')
ax2.set_title('Accuracy', fontsize=14)
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Accuracy', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True)
# Plot 3: PnL
ax3 = fig.add_subplot(gs[1, 0])
ax3.plot(epochs, metrics["train_pnl"], 'g-', label='Training')
ax3.plot(epochs, metrics["val_pnl"], 'm-', label='Validation')
ax3.set_title('Profit and Loss', fontsize=14)
ax3.set_xlabel('Epoch', fontsize=12)
ax3.set_ylabel('PnL', fontsize=12)
ax3.legend(fontsize=10)
ax3.grid(True)
# Plot 4: Win Rate
ax4 = fig.add_subplot(gs[1, 1])
ax4.plot(epochs, metrics["train_win_rate"], 'g-', label='Training')
ax4.plot(epochs, metrics["val_win_rate"], 'm-', label='Validation')
ax4.axhline(y=0.5, color='r', linestyle='--', label='50% Threshold')
ax4.set_title('Win Rate', fontsize=14)
ax4.set_xlabel('Epoch', fontsize=12)
ax4.set_ylabel('Win Rate', fontsize=12)
ax4.legend(fontsize=10)
ax4.grid(True)
# Plot 5: Training Signal Distribution
ax5 = fig.add_subplot(gs[2, 0])
ax5.stackplot(epochs, buy_train, hold_train, sell_train,
labels=['BUY', 'HOLD', 'SELL'],
colors=['green', 'gray', 'red'], alpha=0.7)
ax5.set_title('Training Signal Distribution', fontsize=14)
ax5.set_xlabel('Epoch', fontsize=12)
ax5.set_ylabel('Proportion', fontsize=12)
ax5.legend(fontsize=10)
ax5.set_ylim(0, 1)
ax5.grid(True)
# Plot 6: Validation Signal Distribution
ax6 = fig.add_subplot(gs[2, 1])
ax6.stackplot(epochs, buy_val, hold_val, sell_val,
labels=['BUY', 'HOLD', 'SELL'],
colors=['green', 'gray', 'red'], alpha=0.7)
ax6.set_title('Validation Signal Distribution', fontsize=14)
ax6.set_xlabel('Epoch', fontsize=12)
ax6.set_ylabel('Proportion', fontsize=12)
ax6.legend(fontsize=10)
ax6.set_ylim(0, 1)
ax6.grid(True)
# Plot 7: Performance Correlation Heatmap
ax7 = fig.add_subplot(gs[3, :])
im = ax7.imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
cbar = fig.colorbar(im, ax=ax7, fraction=0.025, pad=0.04)
cbar.set_label('Correlation', rotation=270, labelpad=20, fontsize=12)
# Add ticks and labels
ax7.set_xticks(np.arange(len(labels)))
ax7.set_yticks(np.arange(len(labels)))
ax7.set_xticklabels(labels, rotation=45, ha="right", fontsize=10)
ax7.set_yticklabels(labels, fontsize=10)
# Add text annotations
for i in range(len(labels)):
for j in range(len(labels)):
text = ax7.text(j, i, f"{corr_matrix[i, j]:.2f}",
ha="center", va="center", color="black" if abs(corr_matrix[i, j]) < 0.7 else "white")
ax7.set_title('Correlation Matrix of Performance Metrics', fontsize=14)
# Add main title
plt.suptitle('CNN Model Performance Dashboard', fontsize=20, y=0.98)
plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.savefig(f"{plots_dir}/performance_dashboard.png", dpi=300)
plt.savefig(os.path.join(plots_dir, 'pnl_winrate.png'))
plt.close()
print(f"Performance visualizations saved to {plots_dir}")
@ -1239,7 +1045,7 @@ class CNNModelPyTorch:
import traceback
print(traceback.format_exc())
return False
def extract_hidden_features(self, X):
"""
Extract hidden features from the model - outputs from last dense layer before output.

170
NN/models/dqn_agent.py Normal file
View File

@ -0,0 +1,170 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import random
from typing import Tuple, List
import os
import sys
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from NN.models.simple_cnn import CNNModelPyTorch
class DQNAgent:
"""
Deep Q-Network agent for trading
Uses CNN model as the base network
"""
def __init__(self,
state_size: int,
action_size: int,
window_size: int,
num_features: int,
timeframes: List[str],
learning_rate: float = 0.001,
gamma: float = 0.99,
epsilon: float = 1.0,
epsilon_min: float = 0.01,
epsilon_decay: float = 0.995,
memory_size: int = 10000,
batch_size: int = 64,
target_update: int = 10):
self.state_size = state_size
self.action_size = action_size
self.window_size = window_size
self.num_features = num_features
self.timeframes = timeframes
self.learning_rate = learning_rate
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_min = epsilon_min
self.epsilon_decay = epsilon_decay
self.memory_size = memory_size
self.batch_size = batch_size
self.target_update = target_update
# Device configuration
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Initialize networks
self.policy_net = CNNModelPyTorch(
window_size=window_size,
num_features=num_features,
output_size=action_size,
timeframes=timeframes
).to(self.device)
self.target_net = CNNModelPyTorch(
window_size=window_size,
num_features=num_features,
output_size=action_size,
timeframes=timeframes
).to(self.device)
self.target_net.load_state_dict(self.policy_net.state_dict())
# Initialize optimizer
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
# Initialize memory
self.memory = deque(maxlen=memory_size)
# Training metrics
self.update_count = 0
self.losses = []
def remember(self, state: np.ndarray, action: int, reward: float,
next_state: np.ndarray, done: bool):
"""Store experience in memory"""
self.memory.append((state, action, reward, next_state, done))
def act(self, state: np.ndarray) -> int:
"""Choose action using epsilon-greedy policy"""
if random.random() < self.epsilon:
return random.randrange(self.action_size)
with torch.no_grad():
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
action_probs, _ = self.policy_net(state)
return action_probs.argmax().item()
def replay(self) -> float:
"""Train on a batch of experiences"""
if len(self.memory) < self.batch_size:
return 0.0
# Sample batch
batch = random.sample(self.memory, self.batch_size)
states, actions, rewards, next_states, dones = zip(*batch)
# Convert to tensors and move to device
states = torch.FloatTensor(np.array(states)).to(self.device)
actions = torch.LongTensor(actions).to(self.device)
rewards = torch.FloatTensor(rewards).to(self.device)
next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
dones = torch.FloatTensor(dones).to(self.device)
# Get current Q values
current_q_values, _ = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1))
# Get next Q values from target network
with torch.no_grad():
next_q_values, _ = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
# Compute loss
loss = nn.MSELoss()(current_q_values.squeeze(), target_q_values)
# Optimize
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# Update target network if needed
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
# Decay epsilon
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
return loss.item()
def save(self, path: str):
"""Save model and agent state"""
os.makedirs(os.path.dirname(path), exist_ok=True)
# Save policy network
self.policy_net.save(f"{path}_policy")
# Save target network
self.target_net.save(f"{path}_target")
# Save agent state
state = {
'epsilon': self.epsilon,
'update_count': self.update_count,
'losses': self.losses,
'optimizer_state': self.optimizer.state_dict()
}
torch.save(state, f"{path}_agent_state.pt")
def load(self, path: str):
"""Load model and agent state"""
# Load policy network
self.policy_net.load(f"{path}_policy")
# Load target network
self.target_net.load(f"{path}_target")
# Load agent state
state = torch.load(f"{path}_agent_state.pt")
self.epsilon = state['epsilon']
self.update_count = state['update_count']
self.losses = state['losses']
self.optimizer.load_state_dict(state['optimizer_state'])

130
NN/models/simple_cnn.py Normal file
View File

@ -0,0 +1,130 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import logging
import torch.nn.functional as F
from typing import List, Tuple
# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CNNModelPyTorch(nn.Module):
"""
CNN model for trading signals
Simplified version for RL training
"""
def __init__(self, window_size: int, num_features: int, output_size: int, timeframes: List[str]):
super(CNNModelPyTorch, self).__init__()
self.window_size = window_size
self.num_features = num_features
self.output_size = output_size
self.timeframes = timeframes
# Device configuration
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"Using device: {self.device}")
# Build model
self.build_model()
# Initialize optimizer and scheduler
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
self.optimizer, mode='max', factor=0.5, patience=5, verbose=True
)
# Move model to device
self.to(self.device)
def build_model(self):
"""Build the CNN architecture"""
# First Convolutional Layer
self.conv1 = nn.Conv1d(
in_channels=self.num_features * len(self.timeframes),
out_channels=32,
kernel_size=3,
padding=1
)
self.bn1 = nn.BatchNorm1d(32)
# Second Convolutional Layer
self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm1d(64)
# Third Convolutional Layer
self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm1d(128)
# Calculate size after convolutions
conv_out_size = self.window_size * 128
# Fully connected layers
self.fc1 = nn.Linear(conv_out_size, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, self.output_size)
# Additional output for value estimation
self.value_fc = nn.Linear(256, 1)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""Forward pass through the network"""
# Ensure input is on the correct device
x = x.to(self.device)
# Reshape input: [batch, window_size, features] -> [batch, channels, window_size]
batch_size = x.size(0)
x = x.permute(0, 2, 1)
# Convolutional layers
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
# Flatten
x = x.view(batch_size, -1)
# Fully connected layers
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
# Split into advantage and value streams
advantage = self.fc3(x)
value = self.value_fc(x)
# Combine value and advantage
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
return q_values, value
def predict(self, X):
"""Make predictions"""
self.eval()
# Convert to tensor if not already
if not isinstance(X, torch.Tensor):
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
else:
X_tensor = X.to(self.device)
with torch.no_grad():
q_values, value = self(X_tensor)
q_values_np = q_values.cpu().numpy()
actions = np.argmax(q_values_np, axis=1)
return actions, q_values_np
def save(self, path: str):
"""Save model weights"""
os.makedirs(os.path.dirname(path), exist_ok=True)
torch.save(self.state_dict(), f"{path}.pt")
logger.info(f"Model saved to {path}.pt")
def load(self, path: str):
"""Load model weights"""
self.load_state_dict(torch.load(f"{path}.pt", map_location=self.device))
self.eval()
logger.info(f"Model loaded from {path}.pt")

192
NN/train_rl.py Normal file
View File

@ -0,0 +1,192 @@
import torch
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import logging
import time
from datetime import datetime
import os
import sys
import pandas as pd
import gym
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from NN.utils.data_interface import DataInterface
from NN.utils.trading_env import TradingEnvironment
from NN.models.dqn_agent import DQNAgent
from NN.utils.signal_interpreter import SignalInterpreter
# Configure logging
logger = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('rl_training.log'),
logging.StreamHandler()
]
)
class RLTradingEnvironment(TradingEnvironment):
"""Extended trading environment that reshapes state for CNN"""
def __init__(self, data, window_size, num_features, num_timeframes, **kwargs):
# Set attributes before parent initialization
self.window_size = window_size
self.num_features = num_features
self.num_timeframes = num_timeframes
self.feature_dim = num_features * num_timeframes
# Initialize parent class
super().__init__(data=data, **kwargs)
# Update observation space for CNN
self.observation_space = gym.spaces.Box(
low=-np.inf,
high=np.inf,
shape=(self.window_size, self.feature_dim),
dtype=np.float32
)
def _get_observation(self):
"""Get current observation reshaped for CNN"""
# Get flattened observation from parent class
flat_obs = super()._get_observation()
# Extract features (exclude close price)
features = flat_obs[:-1] # Remove close price
# Calculate number of complete windows
n_windows = len(features) // self.feature_dim
if n_windows < self.window_size:
# Pad with zeros if not enough data
padding = np.zeros((self.window_size - n_windows, self.feature_dim))
reshaped = np.vstack([
padding,
features[-(n_windows * self.feature_dim):].reshape(n_windows, self.feature_dim)
])
else:
# Take the most recent window_size windows
start_idx = (n_windows - self.window_size) * self.feature_dim
reshaped = features[start_idx:].reshape(self.window_size, self.feature_dim)
return reshaped.astype(np.float32)
def train_rl():
"""
Train the RL model using the DQN agent
"""
# Initialize data interface with BTC/USDT and multiple timeframes
timeframes = ['1m', '5m', '15m']
window_size = 20
data_interface = DataInterface(symbol="BTC/USDT", timeframes=timeframes)
# Get training data
X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data()
if X_train is None:
logger.error("Failed to get training data")
return
# Calculate feature dimensions
num_features = X_train.shape[2] # Number of features per timeframe
total_features = num_features * len(timeframes) # Total features across all timeframes
# Flatten features for environment
n_samples = X_train.shape[0]
flattened_features = X_train.reshape(n_samples, window_size, -1) # Reshape to (batch, window, features)
# Create DataFrame with features as separate columns
feature_columns = [f'feature_{i}' for i in range(flattened_features.shape[2])]
df = pd.DataFrame(flattened_features.reshape(n_samples, -1), columns=feature_columns * window_size)
df['close'] = train_prices
# Create environment
env = RLTradingEnvironment(
data=df,
window_size=window_size,
num_features=num_features,
num_timeframes=len(timeframes),
initial_balance=10000,
fee_rate=0.001,
max_steps=1000
)
# Create DQN agent
agent = DQNAgent(
state_size=window_size, # First dimension of observation space
action_size=env.action_space.n,
window_size=window_size,
num_features=num_features,
timeframes=timeframes,
learning_rate=0.001,
gamma=0.99,
epsilon=1.0,
epsilon_min=0.01,
epsilon_decay=0.995,
memory_size=10000,
batch_size=32,
target_update=10
)
# Training parameters
episodes = 1000
max_steps = 1000
best_reward = float('-inf')
best_model_path = 'NN/models/saved/best_rl_model.pth'
# Create models directory if it doesn't exist
os.makedirs(os.path.dirname(best_model_path), exist_ok=True)
# Training loop
for episode in range(episodes):
state = env.reset()
total_reward = 0
for step in range(max_steps):
# Get action from agent
action = agent.act(state)
# Take action in environment
next_state, reward, done, info = env.step(action)
# Store experience in agent's memory
agent.remember(state, action, reward, next_state, done)
# Train agent
if len(agent.memory) > agent.batch_size:
loss = agent.replay()
if loss is not None:
logger.debug(f"Training loss: {loss:.4f}")
# Update state and reward
state = next_state
total_reward += reward
if done:
break
# Update epsilon
agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
# Log episode results
logger.info(f"Episode: {episode + 1}/{episodes}")
logger.info(f"Total Reward: {total_reward:.2f}")
logger.info(f"Final Balance: {info['balance']:.2f}")
logger.info(f"Max Drawdown: {info['max_drawdown']:.2%}")
logger.info(f"Win Rate: {info['win_rate']:.2%}")
logger.info(f"Epsilon: {agent.epsilon:.4f}")
# Save best model
if total_reward > best_reward:
best_reward = total_reward
agent.save(best_model_path)
logger.info(f"New best model saved with reward: {best_reward:.2f}")
# Save checkpoint every 100 episodes
if (episode + 1) % 100 == 0:
checkpoint_path = f'NN/models/saved/rl_model_episode_{episode + 1}.pth'
agent.save(checkpoint_path)
logger.info(f"Checkpoint saved at episode {episode + 1}")
if __name__ == "__main__":
train_rl()

View File

@ -6,6 +6,8 @@ This package contains utility functions and classes used in the neural network t
- Data Interface: Connects to realtime trading data and processes it for the neural network models
"""
from NN.utils.data_interface import DataInterface
from .data_interface import DataInterface
from .trading_env import TradingEnvironment
from .signal_interpreter import SignalInterpreter
__all__ = ['DataInterface']
__all__ = ['DataInterface', 'TradingEnvironment', 'SignalInterpreter']

View File

@ -13,6 +13,7 @@ import json
import pickle
from sklearn.preprocessing import MinMaxScaler
import sys
import ta
# Add project root to sys.path
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -534,3 +535,77 @@ class DataInterface:
timestamp = df['timestamp'].iloc[-1]
return X, timestamp
def get_training_data(self, timeframe='1m', n_candles=5000):
"""
Get a consolidated dataframe for RL training with OHLCV and technical indicators
Args:
timeframe (str): Timeframe to use
n_candles (int): Number of candles to fetch
Returns:
DataFrame: Combined dataframe with price data and technical indicators
"""
# Get historical data
df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles, use_cache=True)
if df is None or len(df) < 100: # Minimum required for indicators
logger.error(f"Not enough data for RL training (need at least 100 candles)")
return None
# Calculate technical indicators
try:
# Add RSI (14)
df['rsi'] = ta.rsi(df['close'], length=14)
# Add MACD
macd = ta.macd(df['close'])
df['macd'] = macd['MACD_12_26_9']
df['macd_signal'] = macd['MACDs_12_26_9']
df['macd_hist'] = macd['MACDh_12_26_9']
# Add Bollinger Bands
bbands = ta.bbands(df['close'], length=20)
df['bb_upper'] = bbands['BBU_20_2.0']
df['bb_middle'] = bbands['BBM_20_2.0']
df['bb_lower'] = bbands['BBL_20_2.0']
# Add ATR (Average True Range)
df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
# Add moving averages
df['sma_20'] = ta.sma(df['close'], length=20)
df['sma_50'] = ta.sma(df['close'], length=50)
df['ema_20'] = ta.ema(df['close'], length=20)
# Add OBV (On-Balance Volume)
df['obv'] = ta.obv(df['close'], df['volume'])
# Add momentum indicators
df['mom'] = ta.mom(df['close'], length=10)
# Normalize price to previous close
df['close_norm'] = df['close'] / df['close'].shift(1) - 1
df['high_norm'] = df['high'] / df['close'].shift(1) - 1
df['low_norm'] = df['low'] / df['close'].shift(1) - 1
# Volatility features
df['volatility'] = df['high'] / df['low'] - 1
# Volume features
df['volume_norm'] = df['volume'] / df['volume'].rolling(20).mean()
# Calculate returns
df['returns_1'] = df['close'].pct_change(1)
df['returns_5'] = df['close'].pct_change(5)
df['returns_10'] = df['close'].pct_change(10)
except Exception as e:
logger.error(f"Error calculating technical indicators: {str(e)}")
return None
# Drop NaN values
df = df.dropna()
return df

162
NN/utils/trading_env.py Normal file
View File

@ -0,0 +1,162 @@
import numpy as np
import gym
from gym import spaces
from typing import Dict, Tuple, List
import pandas as pd
class TradingEnvironment(gym.Env):
"""
Custom trading environment for reinforcement learning
"""
def __init__(self,
data: pd.DataFrame,
initial_balance: float = 100.0,
fee_rate: float = 0.0002,
max_steps: int = 1000):
super(TradingEnvironment, self).__init__()
self.data = data
self.initial_balance = initial_balance
self.fee_rate = fee_rate
self.max_steps = max_steps
# Action space: 0 (SELL), 1 (HOLD), 2 (BUY)
self.action_space = spaces.Discrete(3)
# Observation space: price data, technical indicators, and account state
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(data.shape[1],), # Number of features
dtype=np.float32
)
# Initialize state
self.reset()
def reset(self) -> np.ndarray:
"""Reset the environment to initial state"""
self.current_step = 0
self.balance = self.initial_balance
self.position = 0 # 0: no position, 1: long position
self.entry_price = 0
self.total_trades = 0
self.winning_trades = 0
self.total_pnl = 0
self.balance_history = [self.initial_balance]
self.max_balance = self.initial_balance
return self._get_observation()
def _get_observation(self) -> np.ndarray:
"""Get current observation state"""
return self.data.iloc[self.current_step].values
def _calculate_reward(self, action: int) -> float:
"""Calculate reward based on action and outcome"""
current_price = self.data.iloc[self.current_step]['close']
# If we have an open position
if self.position != 0:
# Calculate PnL
pnl = self.position * (current_price - self.entry_price) / self.entry_price
fees = self.fee_rate * 2 # Entry and exit fees
# Close position
if (action == 0 and self.position > 0) or (action == 2 and self.position < 0):
net_pnl = pnl - fees
self.total_pnl += net_pnl
self.balance *= (1 + net_pnl)
self.balance_history.append(self.balance)
self.max_balance = max(self.max_balance, self.balance)
self.total_trades += 1
if net_pnl > 0:
self.winning_trades += 1
# Reward based on PnL
reward = net_pnl * 100 # Scale up for better learning
# Additional reward for win rate
win_rate = self.winning_trades / max(1, self.total_trades)
reward += win_rate * 0.1
self.position = 0
return reward
# Hold position
return pnl * 0.1 # Small reward for holding profitable positions
# No position
if action == 1: # HOLD
return 0
# Open new position
if action in [0, 2]: # SELL or BUY
self.position = -1 if action == 0 else 1
self.entry_price = current_price
return -self.fee_rate # Small penalty for trading
return 0
def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
"""Execute one step in the environment"""
# Calculate reward
reward = self._calculate_reward(action)
# Move to next step
self.current_step += 1
# Check if episode is done
done = self.current_step >= min(self.max_steps - 1, len(self.data) - 1)
# Get next observation
observation = self._get_observation()
# Calculate max drawdown
max_drawdown = 0
if len(self.balance_history) > 1:
peak = self.balance_history[0]
for balance in self.balance_history:
peak = max(peak, balance)
drawdown = (peak - balance) / peak
max_drawdown = max(max_drawdown, drawdown)
# Additional info
info = {
'balance': self.balance,
'position': self.position,
'total_trades': self.total_trades,
'win_rate': self.winning_trades / max(1, self.total_trades),
'total_pnl': self.total_pnl,
'max_drawdown': max_drawdown
}
return observation, reward, done, info
def render(self, mode='human'):
"""Render the environment"""
if mode == 'human':
print(f"Step: {self.current_step}")
print(f"Balance: ${self.balance:.2f}")
print(f"Position: {self.position}")
print(f"Total Trades: {self.total_trades}")
print(f"Win Rate: {self.winning_trades/max(1, self.total_trades):.2%}")
print(f"Total PnL: ${self.total_pnl:.2f}")
print(f"Max Drawdown: {self._calculate_max_drawdown():.2%}")
print("-" * 50)
def _calculate_max_drawdown(self):
"""Calculate maximum drawdown from balance history"""
if len(self.balance_history) <= 1:
return 0.0
peak = self.balance_history[0]
max_drawdown = 0.0
for balance in self.balance_history:
peak = max(peak, balance)
drawdown = (peak - balance) / peak
max_drawdown = max(max_drawdown, drawdown)
return max_drawdown