192 lines
6.6 KiB
Python
192 lines
6.6 KiB
Python
import torch
|
|
import numpy as np
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
import logging
|
|
import time
|
|
from datetime import datetime
|
|
import os
|
|
import sys
|
|
import pandas as pd
|
|
import gym
|
|
|
|
# Add parent directory to path
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from NN.utils.data_interface import DataInterface
|
|
from NN.utils.trading_env import TradingEnvironment
|
|
from NN.models.dqn_agent import DQNAgent
|
|
from NN.utils.signal_interpreter import SignalInterpreter
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('rl_training.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
class RLTradingEnvironment(TradingEnvironment):
|
|
"""Extended trading environment that reshapes state for CNN"""
|
|
def __init__(self, data, window_size, num_features, num_timeframes, **kwargs):
|
|
# Set attributes before parent initialization
|
|
self.window_size = window_size
|
|
self.num_features = num_features
|
|
self.num_timeframes = num_timeframes
|
|
self.feature_dim = num_features * num_timeframes
|
|
|
|
# Initialize parent class
|
|
super().__init__(data=data, **kwargs)
|
|
|
|
# Update observation space for CNN
|
|
self.observation_space = gym.spaces.Box(
|
|
low=-np.inf,
|
|
high=np.inf,
|
|
shape=(self.window_size, self.feature_dim),
|
|
dtype=np.float32
|
|
)
|
|
|
|
def _get_observation(self):
|
|
"""Get current observation reshaped for CNN"""
|
|
# Get flattened observation from parent class
|
|
flat_obs = super()._get_observation()
|
|
|
|
# Extract features (exclude close price)
|
|
features = flat_obs[:-1] # Remove close price
|
|
|
|
# Calculate number of complete windows
|
|
n_windows = len(features) // self.feature_dim
|
|
if n_windows < self.window_size:
|
|
# Pad with zeros if not enough data
|
|
padding = np.zeros((self.window_size - n_windows, self.feature_dim))
|
|
reshaped = np.vstack([
|
|
padding,
|
|
features[-(n_windows * self.feature_dim):].reshape(n_windows, self.feature_dim)
|
|
])
|
|
else:
|
|
# Take the most recent window_size windows
|
|
start_idx = (n_windows - self.window_size) * self.feature_dim
|
|
reshaped = features[start_idx:].reshape(self.window_size, self.feature_dim)
|
|
|
|
return reshaped.astype(np.float32)
|
|
|
|
def train_rl():
|
|
"""
|
|
Train the RL model using the DQN agent
|
|
"""
|
|
# Initialize data interface with BTC/USDT and multiple timeframes
|
|
timeframes = ['1m', '5m', '15m']
|
|
window_size = 20
|
|
data_interface = DataInterface(symbol="BTC/USDT", timeframes=timeframes)
|
|
|
|
# Get training data
|
|
X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data()
|
|
if X_train is None:
|
|
logger.error("Failed to get training data")
|
|
return
|
|
|
|
# Calculate feature dimensions
|
|
num_features = X_train.shape[2] # Number of features per timeframe
|
|
total_features = num_features * len(timeframes) # Total features across all timeframes
|
|
|
|
# Flatten features for environment
|
|
n_samples = X_train.shape[0]
|
|
flattened_features = X_train.reshape(n_samples, window_size, -1) # Reshape to (batch, window, features)
|
|
|
|
# Create DataFrame with features as separate columns
|
|
feature_columns = [f'feature_{i}' for i in range(flattened_features.shape[2])]
|
|
df = pd.DataFrame(flattened_features.reshape(n_samples, -1), columns=feature_columns * window_size)
|
|
df['close'] = train_prices
|
|
|
|
# Create environment
|
|
env = RLTradingEnvironment(
|
|
data=df,
|
|
window_size=window_size,
|
|
num_features=num_features,
|
|
num_timeframes=len(timeframes),
|
|
initial_balance=10000,
|
|
fee_rate=0.001,
|
|
max_steps=1000
|
|
)
|
|
|
|
# Create DQN agent
|
|
agent = DQNAgent(
|
|
state_size=window_size, # First dimension of observation space
|
|
action_size=env.action_space.n,
|
|
window_size=window_size,
|
|
num_features=num_features,
|
|
timeframes=timeframes,
|
|
learning_rate=0.001,
|
|
gamma=0.99,
|
|
epsilon=1.0,
|
|
epsilon_min=0.01,
|
|
epsilon_decay=0.995,
|
|
memory_size=10000,
|
|
batch_size=32,
|
|
target_update=10
|
|
)
|
|
|
|
# Training parameters
|
|
episodes = 1000
|
|
max_steps = 1000
|
|
best_reward = float('-inf')
|
|
best_model_path = 'NN/models/saved/best_rl_model.pth'
|
|
|
|
# Create models directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(best_model_path), exist_ok=True)
|
|
|
|
# Training loop
|
|
for episode in range(episodes):
|
|
state = env.reset()
|
|
total_reward = 0
|
|
|
|
for step in range(max_steps):
|
|
# Get action from agent
|
|
action = agent.act(state)
|
|
|
|
# Take action in environment
|
|
next_state, reward, done, info = env.step(action)
|
|
|
|
# Store experience in agent's memory
|
|
agent.remember(state, action, reward, next_state, done)
|
|
|
|
# Train agent
|
|
if len(agent.memory) > agent.batch_size:
|
|
loss = agent.replay()
|
|
if loss is not None:
|
|
logger.debug(f"Training loss: {loss:.4f}")
|
|
|
|
# Update state and reward
|
|
state = next_state
|
|
total_reward += reward
|
|
|
|
if done:
|
|
break
|
|
|
|
# Update epsilon
|
|
agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
|
|
|
|
# Log episode results
|
|
logger.info(f"Episode: {episode + 1}/{episodes}")
|
|
logger.info(f"Total Reward: {total_reward:.2f}")
|
|
logger.info(f"Final Balance: {info['balance']:.2f}")
|
|
logger.info(f"Max Drawdown: {info['max_drawdown']:.2%}")
|
|
logger.info(f"Win Rate: {info['win_rate']:.2%}")
|
|
logger.info(f"Epsilon: {agent.epsilon:.4f}")
|
|
|
|
# Save best model
|
|
if total_reward > best_reward:
|
|
best_reward = total_reward
|
|
agent.save(best_model_path)
|
|
logger.info(f"New best model saved with reward: {best_reward:.2f}")
|
|
|
|
# Save checkpoint every 100 episodes
|
|
if (episode + 1) % 100 == 0:
|
|
checkpoint_path = f'NN/models/saved/rl_model_episode_{episode + 1}.pth'
|
|
agent.save(checkpoint_path)
|
|
logger.info(f"Checkpoint saved at episode {episode + 1}")
|
|
|
|
if __name__ == "__main__":
|
|
train_rl() |