RL training in NN working
This commit is contained in:
parent
4eac14022c
commit
0ad9484d56
@ -13,10 +13,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CNNModelPyTorch(nn.Module):
|
||||
"""
|
||||
CNN model for trading signals
|
||||
Simplified version for RL training
|
||||
CNN model for trading with multiple timeframes
|
||||
"""
|
||||
def __init__(self, window_size: int, num_features: int, output_size: int, timeframes: List[str]):
|
||||
def __init__(self, window_size, num_features, output_size, timeframes):
|
||||
super(CNNModelPyTorch, self).__init__()
|
||||
|
||||
self.window_size = window_size
|
||||
@ -24,12 +23,33 @@ class CNNModelPyTorch(nn.Module):
|
||||
self.output_size = output_size
|
||||
self.timeframes = timeframes
|
||||
|
||||
# Calculate total input features across all timeframes
|
||||
self.total_features = num_features * len(timeframes)
|
||||
|
||||
# Device configuration
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
# Build model
|
||||
self.build_model()
|
||||
# Convolutional layers
|
||||
self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
|
||||
self.bn1 = nn.BatchNorm1d(64)
|
||||
|
||||
self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
|
||||
self.bn2 = nn.BatchNorm1d(128)
|
||||
|
||||
self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
|
||||
self.bn3 = nn.BatchNorm1d(256)
|
||||
|
||||
# Calculate size after convolutions
|
||||
conv_output_size = window_size * 256
|
||||
|
||||
# Fully connected layers
|
||||
self.fc1 = nn.Linear(conv_output_size, 512)
|
||||
self.fc2 = nn.Linear(512, 256)
|
||||
|
||||
# Advantage and Value streams (Dueling DQN architecture)
|
||||
self.fc3 = nn.Linear(256, output_size) # Advantage stream
|
||||
self.value_fc = nn.Linear(256, 1) # Value stream
|
||||
|
||||
# Initialize optimizer and scheduler
|
||||
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
||||
@ -40,36 +60,6 @@ class CNNModelPyTorch(nn.Module):
|
||||
# Move model to device
|
||||
self.to(self.device)
|
||||
|
||||
def build_model(self):
|
||||
"""Build the CNN architecture"""
|
||||
# First Convolutional Layer
|
||||
self.conv1 = nn.Conv1d(
|
||||
in_channels=self.num_features * len(self.timeframes),
|
||||
out_channels=32,
|
||||
kernel_size=3,
|
||||
padding=1
|
||||
)
|
||||
self.bn1 = nn.BatchNorm1d(32)
|
||||
|
||||
# Second Convolutional Layer
|
||||
self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
|
||||
self.bn2 = nn.BatchNorm1d(64)
|
||||
|
||||
# Third Convolutional Layer
|
||||
self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
|
||||
self.bn3 = nn.BatchNorm1d(128)
|
||||
|
||||
# Calculate size after convolutions
|
||||
conv_out_size = self.window_size * 128
|
||||
|
||||
# Fully connected layers
|
||||
self.fc1 = nn.Linear(conv_out_size, 512)
|
||||
self.fc2 = nn.Linear(512, 256)
|
||||
self.fc3 = nn.Linear(256, self.output_size)
|
||||
|
||||
# Additional output for value estimation
|
||||
self.value_fc = nn.Linear(256, 1)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Forward pass through the network"""
|
||||
# Ensure input is on the correct device
|
||||
|
422
NN/train_rl.py
422
NN/train_rl.py
@ -8,6 +8,7 @@ import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
import gym
|
||||
import json
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@ -28,19 +29,31 @@ logging.basicConfig(
|
||||
]
|
||||
)
|
||||
|
||||
class RLTradingEnvironment(TradingEnvironment):
|
||||
"""Extended trading environment that reshapes state for CNN"""
|
||||
def __init__(self, data, window_size, num_features, num_timeframes, **kwargs):
|
||||
# Set attributes before parent initialization
|
||||
class RLTradingEnvironment(gym.Env):
|
||||
"""
|
||||
Reinforcement Learning environment for trading with technical indicators
|
||||
from multiple timeframes
|
||||
"""
|
||||
def __init__(self, features_1m, features_5m, features_15m, window_size=20, trading_fee=0.001):
|
||||
super().__init__()
|
||||
|
||||
# Initialize attributes before parent class
|
||||
self.window_size = window_size
|
||||
self.num_features = num_features
|
||||
self.num_timeframes = num_timeframes
|
||||
self.feature_dim = num_features * num_timeframes
|
||||
self.num_features = features_1m.shape[1] - 1 # Exclude close price
|
||||
self.num_timeframes = 3 # 1m, 5m, 15m
|
||||
self.feature_dim = self.num_features * self.num_timeframes
|
||||
|
||||
# Initialize parent class
|
||||
super().__init__(data=data, **kwargs)
|
||||
# Store features from different timeframes
|
||||
self.features_1m = features_1m
|
||||
self.features_5m = features_5m
|
||||
self.features_15m = features_15m
|
||||
|
||||
# Update observation space for CNN
|
||||
# Trading parameters
|
||||
self.initial_balance = 1.0
|
||||
self.trading_fee = trading_fee
|
||||
|
||||
# Define action and observation spaces
|
||||
self.action_space = gym.spaces.Discrete(3) # 0: Buy, 1: Sell, 2: Hold
|
||||
self.observation_space = gym.spaces.Box(
|
||||
low=-np.inf,
|
||||
high=np.inf,
|
||||
@ -48,145 +61,354 @@ class RLTradingEnvironment(TradingEnvironment):
|
||||
dtype=np.float32
|
||||
)
|
||||
|
||||
# State variables
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""Reset the environment to initial state"""
|
||||
self.balance = self.initial_balance
|
||||
self.position = 0.0 # Amount of asset held
|
||||
self.current_step = self.window_size
|
||||
self.trades = 0
|
||||
self.wins = 0
|
||||
self.losses = 0
|
||||
self.trade_history = []
|
||||
|
||||
# Get initial observation
|
||||
observation = self._get_observation()
|
||||
return observation
|
||||
|
||||
def _get_observation(self):
|
||||
"""Get current observation reshaped for CNN"""
|
||||
# Get flattened observation from parent class
|
||||
flat_obs = super()._get_observation()
|
||||
"""
|
||||
Get the current state observation.
|
||||
Combine features from multiple timeframes, reshaped for the CNN.
|
||||
"""
|
||||
# Calculate indices for each timeframe
|
||||
idx_1m = self.current_step
|
||||
idx_5m = idx_1m // 5
|
||||
idx_15m = idx_1m // 15
|
||||
|
||||
# Extract features (exclude close price)
|
||||
features = flat_obs[:-1] # Remove close price
|
||||
# Extract feature windows from each timeframe
|
||||
window_1m = self.features_1m[idx_1m - self.window_size:idx_1m]
|
||||
|
||||
# Calculate number of complete windows
|
||||
n_windows = len(features) // self.feature_dim
|
||||
if n_windows < self.window_size:
|
||||
# Pad with zeros if not enough data
|
||||
padding = np.zeros((self.window_size - n_windows, self.feature_dim))
|
||||
reshaped = np.vstack([
|
||||
padding,
|
||||
features[-(n_windows * self.feature_dim):].reshape(n_windows, self.feature_dim)
|
||||
# Handle 5m timeframe
|
||||
start_5m = max(0, idx_5m - self.window_size)
|
||||
window_5m = self.features_5m[start_5m:idx_5m]
|
||||
|
||||
# Handle 15m timeframe
|
||||
start_15m = max(0, idx_15m - self.window_size)
|
||||
window_15m = self.features_15m[start_15m:idx_15m]
|
||||
|
||||
# Pad if needed (for 5m and 15m)
|
||||
if len(window_5m) < self.window_size:
|
||||
padding = np.zeros((self.window_size - len(window_5m), window_5m.shape[1]))
|
||||
window_5m = np.vstack([padding, window_5m])
|
||||
|
||||
if len(window_15m) < self.window_size:
|
||||
padding = np.zeros((self.window_size - len(window_15m), window_15m.shape[1]))
|
||||
window_15m = np.vstack([padding, window_15m])
|
||||
|
||||
# Combine features from all timeframes
|
||||
combined_features = np.hstack([
|
||||
window_1m.reshape(self.window_size, -1),
|
||||
window_5m.reshape(self.window_size, -1),
|
||||
window_15m.reshape(self.window_size, -1)
|
||||
])
|
||||
|
||||
# Convert to float32 and handle any NaN values
|
||||
combined_features = np.nan_to_num(combined_features, nan=0.0).astype(np.float32)
|
||||
|
||||
return combined_features
|
||||
|
||||
def step(self, action):
|
||||
"""
|
||||
Take an action in the environment and return the next state, reward, done flag, and info
|
||||
|
||||
Args:
|
||||
action (int): 0 = Buy, 1 = Sell, 2 = Hold
|
||||
|
||||
Returns:
|
||||
tuple: (observation, reward, done, info)
|
||||
"""
|
||||
# Get current and next price
|
||||
current_price = self.features_1m[self.current_step, -1] # Close price is last column
|
||||
next_price = self.features_1m[self.current_step + 1, -1]
|
||||
|
||||
# Handle zero or negative prices
|
||||
if current_price <= 0:
|
||||
current_price = 1e-8 # Small positive number
|
||||
if next_price <= 0:
|
||||
next_price = current_price # Use current price if next price is invalid
|
||||
|
||||
price_change = (next_price - current_price) / current_price
|
||||
|
||||
# Default reward is slightly negative to discourage inaction
|
||||
reward = -0.0001
|
||||
done = False
|
||||
|
||||
# Execute action
|
||||
if action == 0: # BUY
|
||||
if self.position == 0: # Only buy if not already in position
|
||||
self.position = self.balance * (1 - self.trading_fee)
|
||||
self.balance = 0
|
||||
self.trades += 1
|
||||
reward = 0 # Neutral reward for entering position
|
||||
self.trade_entry_price = current_price
|
||||
|
||||
elif action == 1: # SELL
|
||||
if self.position > 0: # Only sell if in position
|
||||
# Calculate position value at current price
|
||||
position_value = self.position * (1 + price_change)
|
||||
self.balance = position_value * (1 - self.trading_fee)
|
||||
|
||||
# Calculate profit/loss from trade
|
||||
profit_pct = (next_price - self.trade_entry_price) / self.trade_entry_price
|
||||
reward = profit_pct * 10 # Scale reward by profit percentage
|
||||
|
||||
# Update win/loss count
|
||||
if profit_pct > 0:
|
||||
self.wins += 1
|
||||
else:
|
||||
# Take the most recent window_size windows
|
||||
start_idx = (n_windows - self.window_size) * self.feature_dim
|
||||
reshaped = features[start_idx:].reshape(self.window_size, self.feature_dim)
|
||||
self.losses += 1
|
||||
|
||||
return reshaped.astype(np.float32)
|
||||
# Record trade
|
||||
self.trade_history.append({
|
||||
'entry_price': self.trade_entry_price,
|
||||
'exit_price': next_price,
|
||||
'profit_pct': profit_pct
|
||||
})
|
||||
|
||||
def train_rl():
|
||||
# Reset position
|
||||
self.position = 0
|
||||
|
||||
# else: (action == 2 - HOLD) - no position change
|
||||
|
||||
# Move to next step
|
||||
self.current_step += 1
|
||||
|
||||
# Check if done
|
||||
if self.current_step >= len(self.features_1m) - 1:
|
||||
done = True
|
||||
|
||||
# Apply final evaluation
|
||||
if self.position > 0:
|
||||
# Force close position at the end
|
||||
position_value = self.position * (1 + price_change)
|
||||
self.balance = position_value * (1 - self.trading_fee)
|
||||
profit_pct = (next_price - self.trade_entry_price) / self.trade_entry_price
|
||||
reward += profit_pct * 10
|
||||
|
||||
# Update win/loss count
|
||||
if profit_pct > 0:
|
||||
self.wins += 1
|
||||
else:
|
||||
self.losses += 1
|
||||
|
||||
# Get the next observation
|
||||
observation = self._get_observation()
|
||||
|
||||
# Calculate metrics for info
|
||||
total_value = self.balance + self.position * next_price
|
||||
gain = (total_value - self.initial_balance) / self.initial_balance
|
||||
self.win_rate = self.wins / max(1, self.trades)
|
||||
|
||||
info = {
|
||||
'balance': self.balance,
|
||||
'position': self.position,
|
||||
'total_value': total_value,
|
||||
'gain': gain,
|
||||
'trades': self.trades,
|
||||
'win_rate': self.win_rate
|
||||
}
|
||||
|
||||
return observation, reward, done, info
|
||||
|
||||
def train_rl(env_class=None, num_episodes=5000, max_steps=2000, save_path="NN/models/saved/dqn_agent"):
|
||||
"""
|
||||
Train the RL model using the DQN agent
|
||||
Train DQN agent for RL-based trading with extended training and monitoring
|
||||
"""
|
||||
# Initialize data interface with BTC/USDT and multiple timeframes
|
||||
timeframes = ['1m', '5m', '15m']
|
||||
logger.info("Starting extended RL training for trading...")
|
||||
|
||||
# Environment setup
|
||||
window_size = 20
|
||||
data_interface = DataInterface(symbol="BTC/USDT", timeframes=timeframes)
|
||||
timeframes = ["1m", "5m", "15m"]
|
||||
trading_fee = 0.001
|
||||
|
||||
# Get training data
|
||||
X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data()
|
||||
if X_train is None:
|
||||
logger.error("Failed to get training data")
|
||||
return
|
||||
# Ensure save directory exists
|
||||
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||||
|
||||
# Calculate feature dimensions
|
||||
num_features = X_train.shape[2] # Number of features per timeframe
|
||||
total_features = num_features * len(timeframes) # Total features across all timeframes
|
||||
# Setup TensorBoard for monitoring
|
||||
writer = SummaryWriter(f'runs/rl_training_{datetime.now().strftime("%Y%m%d_%H%M%S")}')
|
||||
|
||||
# Flatten features for environment
|
||||
n_samples = X_train.shape[0]
|
||||
flattened_features = X_train.reshape(n_samples, window_size, -1) # Reshape to (batch, window, features)
|
||||
# Data loading
|
||||
data_interface = DataInterface(
|
||||
symbol="BTC/USDT",
|
||||
timeframes=timeframes
|
||||
)
|
||||
|
||||
# Create DataFrame with features as separate columns
|
||||
feature_columns = [f'feature_{i}' for i in range(flattened_features.shape[2])]
|
||||
df = pd.DataFrame(flattened_features.reshape(n_samples, -1), columns=feature_columns * window_size)
|
||||
df['close'] = train_prices
|
||||
# Get training data for each timeframe with more data
|
||||
features_1m = data_interface.get_training_data("1m", n_candles=5000)
|
||||
features_5m = data_interface.get_training_data("5m", n_candles=2500)
|
||||
features_15m = data_interface.get_training_data("15m", n_candles=2500)
|
||||
|
||||
if features_1m is None or features_5m is None or features_15m is None:
|
||||
logger.error("Failed to load training data")
|
||||
return None
|
||||
|
||||
# Convert DataFrames to numpy arrays, excluding timestamp column
|
||||
features_1m = features_1m.drop('timestamp', axis=1, errors='ignore').values
|
||||
features_5m = features_5m.drop('timestamp', axis=1, errors='ignore').values
|
||||
features_15m = features_15m.drop('timestamp', axis=1, errors='ignore').values
|
||||
|
||||
# Calculate number of features per timeframe
|
||||
num_features = features_1m.shape[1] # Number of features after dropping timestamp
|
||||
|
||||
# Create environment
|
||||
env = RLTradingEnvironment(
|
||||
data=df,
|
||||
features_1m=features_1m,
|
||||
features_5m=features_5m,
|
||||
features_15m=features_15m,
|
||||
window_size=window_size,
|
||||
num_features=num_features,
|
||||
num_timeframes=len(timeframes),
|
||||
initial_balance=10000,
|
||||
fee_rate=0.001,
|
||||
max_steps=1000
|
||||
trading_fee=trading_fee
|
||||
)
|
||||
|
||||
# Create DQN agent
|
||||
# Create agent with adjusted parameters for longer training
|
||||
state_size = window_size
|
||||
action_size = 3
|
||||
agent = DQNAgent(
|
||||
state_size=window_size, # First dimension of observation space
|
||||
action_size=env.action_space.n,
|
||||
state_size=state_size,
|
||||
action_size=action_size,
|
||||
window_size=window_size,
|
||||
num_features=num_features,
|
||||
timeframes=timeframes,
|
||||
learning_rate=0.001,
|
||||
gamma=0.99,
|
||||
learning_rate=0.0005, # Reduced learning rate for stability
|
||||
gamma=0.99, # Increased discount factor
|
||||
epsilon=1.0,
|
||||
epsilon_min=0.01,
|
||||
epsilon_decay=0.995,
|
||||
memory_size=10000,
|
||||
batch_size=32,
|
||||
target_update=10
|
||||
epsilon_decay=0.999, # Slower epsilon decay
|
||||
memory_size=50000, # Increased memory size
|
||||
batch_size=128 # Increased batch size
|
||||
)
|
||||
|
||||
# Training parameters
|
||||
episodes = 1000
|
||||
max_steps = 1000
|
||||
# Variables to track best performance
|
||||
best_reward = float('-inf')
|
||||
best_model_path = 'NN/models/saved/best_rl_model.pth'
|
||||
best_episode = 0
|
||||
best_pnl = float('-inf')
|
||||
best_win_rate = 0.0
|
||||
|
||||
# Create models directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(best_model_path), exist_ok=True)
|
||||
# Training metrics
|
||||
episode_rewards = []
|
||||
episode_pnls = []
|
||||
episode_win_rates = []
|
||||
episode_trades = []
|
||||
|
||||
# Check if previous best model exists and load it
|
||||
best_model_path = f"{save_path}_best"
|
||||
if os.path.exists(f"{best_model_path}_policy.pt"):
|
||||
try:
|
||||
logger.info(f"Loading previous best model from {best_model_path}")
|
||||
agent.load(best_model_path)
|
||||
metadata_path = f"{best_model_path}_metadata.json"
|
||||
if os.path.exists(metadata_path):
|
||||
with open(metadata_path, 'r') as f:
|
||||
metadata = json.load(f)
|
||||
best_reward = metadata.get('best_reward', best_reward)
|
||||
best_episode = metadata.get('best_episode', best_episode)
|
||||
best_pnl = metadata.get('best_pnl', best_pnl)
|
||||
best_win_rate = metadata.get('best_win_rate', best_win_rate)
|
||||
logger.info(f"Loaded previous best metrics - Reward: {best_reward:.4f}, PnL: {best_pnl:.4f}, Win Rate: {best_win_rate:.4f}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading previous best model: {e}")
|
||||
|
||||
# Training loop
|
||||
for episode in range(episodes):
|
||||
try:
|
||||
for episode in range(1, num_episodes + 1):
|
||||
state = env.reset()
|
||||
total_reward = 0
|
||||
done = False
|
||||
steps = 0
|
||||
|
||||
for step in range(max_steps):
|
||||
# Get action from agent
|
||||
while not done and steps < max_steps:
|
||||
action = agent.act(state)
|
||||
|
||||
# Take action in environment
|
||||
next_state, reward, done, info = env.step(action)
|
||||
|
||||
# Store experience in agent's memory
|
||||
agent.remember(state, action, reward, next_state, done)
|
||||
|
||||
# Train agent
|
||||
if len(agent.memory) > agent.batch_size:
|
||||
# Learn from experience
|
||||
loss = agent.replay()
|
||||
if loss is not None:
|
||||
logger.debug(f"Training loss: {loss:.4f}")
|
||||
|
||||
# Update state and reward
|
||||
state = next_state
|
||||
total_reward += reward
|
||||
steps += 1
|
||||
|
||||
if done:
|
||||
break
|
||||
# Calculate episode metrics
|
||||
episode_rewards.append(total_reward)
|
||||
episode_pnls.append(info['gain'])
|
||||
episode_win_rates.append(info['win_rate'])
|
||||
episode_trades.append(info['trades'])
|
||||
|
||||
# Update epsilon
|
||||
agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
|
||||
# Log to TensorBoard
|
||||
writer.add_scalar('Reward/episode', total_reward, episode)
|
||||
writer.add_scalar('PnL/episode', info['gain'], episode)
|
||||
writer.add_scalar('WinRate/episode', info['win_rate'], episode)
|
||||
writer.add_scalar('Trades/episode', info['trades'], episode)
|
||||
writer.add_scalar('Epsilon/episode', agent.epsilon, episode)
|
||||
|
||||
# Log episode results
|
||||
logger.info(f"Episode: {episode + 1}/{episodes}")
|
||||
logger.info(f"Total Reward: {total_reward:.2f}")
|
||||
logger.info(f"Final Balance: {info['balance']:.2f}")
|
||||
logger.info(f"Max Drawdown: {info['max_drawdown']:.2%}")
|
||||
logger.info(f"Win Rate: {info['win_rate']:.2%}")
|
||||
logger.info(f"Epsilon: {agent.epsilon:.4f}")
|
||||
|
||||
# Save best model
|
||||
if total_reward > best_reward:
|
||||
# Save the best model based on multiple metrics (only every 50 episodes)
|
||||
is_better = False
|
||||
if episode % 50 == 0: # Only check for saving every 50 episodes
|
||||
if (info['gain'] > best_pnl and info['win_rate'] > 0.5) or \
|
||||
(info['gain'] > best_pnl * 1.1) or \
|
||||
(info['win_rate'] > best_win_rate * 1.1):
|
||||
best_reward = total_reward
|
||||
best_episode = episode
|
||||
best_pnl = info['gain']
|
||||
best_win_rate = info['win_rate']
|
||||
agent.save(best_model_path)
|
||||
logger.info(f"New best model saved with reward: {best_reward:.2f}")
|
||||
is_better = True
|
||||
|
||||
# Save checkpoint every 100 episodes
|
||||
if (episode + 1) % 100 == 0:
|
||||
checkpoint_path = f'NN/models/saved/rl_model_episode_{episode + 1}.pth'
|
||||
agent.save(checkpoint_path)
|
||||
logger.info(f"Checkpoint saved at episode {episode + 1}")
|
||||
# Save metadata about the best model
|
||||
metadata = {
|
||||
'best_reward': best_reward,
|
||||
'best_episode': best_episode,
|
||||
'best_pnl': best_pnl,
|
||||
'best_win_rate': best_win_rate,
|
||||
'date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
with open(f"{best_model_path}_metadata.json", 'w') as f:
|
||||
json.dump(metadata, f)
|
||||
|
||||
# Log training progress
|
||||
if episode % 10 == 0:
|
||||
avg_reward = sum(episode_rewards[-10:]) / 10
|
||||
avg_pnl = sum(episode_pnls[-10:]) / 10
|
||||
avg_win_rate = sum(episode_win_rates[-10:]) / 10
|
||||
avg_trades = sum(episode_trades[-10:]) / 10
|
||||
|
||||
status = "NEW BEST!" if is_better else ""
|
||||
logger.info(f"Episode {episode}/{num_episodes} {status}")
|
||||
logger.info(f"Metrics (last 10 episodes):")
|
||||
logger.info(f" Reward: {avg_reward:.4f}")
|
||||
logger.info(f" PnL: {avg_pnl:.4f}")
|
||||
logger.info(f" Win Rate: {avg_win_rate:.4f}")
|
||||
logger.info(f" Trades: {avg_trades:.2f}")
|
||||
logger.info(f" Epsilon: {agent.epsilon:.4f}")
|
||||
logger.info(f"Best so far - PnL: {best_pnl:.4f}, Win Rate: {best_win_rate:.4f}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Training interrupted by user. Saving best model...")
|
||||
|
||||
# Close TensorBoard writer
|
||||
writer.close()
|
||||
|
||||
# Final logs
|
||||
logger.info(f"Training completed. Best model from episode {best_episode}")
|
||||
logger.info(f"Best metrics:")
|
||||
logger.info(f" Reward: {best_reward:.4f}")
|
||||
logger.info(f" PnL: {best_pnl:.4f}")
|
||||
logger.info(f" Win Rate: {best_win_rate:.4f}")
|
||||
|
||||
# Return the agent for potential further use
|
||||
return agent
|
||||
|
||||
if __name__ == "__main__":
|
||||
train_rl()
|
@ -303,6 +303,76 @@ class DataInterface:
|
||||
"""
|
||||
return len(self.timeframes) * 5 # OHLCV features for each timeframe
|
||||
|
||||
def get_features(self, timeframe, n_candles=1000):
|
||||
"""
|
||||
Get feature data with technical indicators for a specific timeframe.
|
||||
|
||||
Args:
|
||||
timeframe (str): Timeframe to get features for ('1m', '5m', etc.)
|
||||
n_candles (int): Number of candles to get
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of feature data including technical indicators
|
||||
and the close price as the last column
|
||||
"""
|
||||
# Get historical data
|
||||
df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles)
|
||||
|
||||
if df is None or df.empty:
|
||||
logger.error(f"No data available for {self.symbol} {timeframe}")
|
||||
return None
|
||||
|
||||
# Add technical indicators
|
||||
df = self.add_technical_indicators(df)
|
||||
|
||||
# Drop NaN values that might have been introduced by indicators
|
||||
df = df.dropna()
|
||||
|
||||
# Extract features (all columns except timestamp)
|
||||
features = df.drop('timestamp', axis=1).values
|
||||
|
||||
logger.info(f"Prepared {len(features)} {timeframe} feature rows with {features.shape[1]} features")
|
||||
return features
|
||||
|
||||
def add_technical_indicators(self, df):
|
||||
"""
|
||||
Add technical indicators to the dataframe.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): DataFrame with OHLCV data
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with added technical indicators
|
||||
"""
|
||||
# Make a copy to avoid modifying the original
|
||||
df_copy = df.copy()
|
||||
|
||||
# Basic price indicators
|
||||
df_copy['returns'] = df_copy['close'].pct_change()
|
||||
df_copy['log_returns'] = np.log(df_copy['close']/df_copy['close'].shift(1))
|
||||
|
||||
# Moving Averages
|
||||
df_copy['sma_7'] = ta.trend.sma_indicator(df_copy['close'], window=7)
|
||||
df_copy['sma_25'] = ta.trend.sma_indicator(df_copy['close'], window=25)
|
||||
df_copy['sma_99'] = ta.trend.sma_indicator(df_copy['close'], window=99)
|
||||
|
||||
# MACD
|
||||
macd = ta.trend.MACD(df_copy['close'])
|
||||
df_copy['macd'] = macd.macd()
|
||||
df_copy['macd_signal'] = macd.macd_signal()
|
||||
df_copy['macd_diff'] = macd.macd_diff()
|
||||
|
||||
# RSI
|
||||
df_copy['rsi'] = ta.momentum.rsi(df_copy['close'], window=14)
|
||||
|
||||
# Bollinger Bands
|
||||
bollinger = ta.volatility.BollingerBands(df_copy['close'])
|
||||
df_copy['bb_high'] = bollinger.bollinger_hband()
|
||||
df_copy['bb_low'] = bollinger.bollinger_lband()
|
||||
df_copy['bb_pct'] = bollinger.bollinger_pband()
|
||||
|
||||
return df_copy
|
||||
|
||||
def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
|
||||
"""
|
||||
Robust PnL calculator that handles:
|
||||
@ -557,33 +627,33 @@ class DataInterface:
|
||||
# Calculate technical indicators
|
||||
try:
|
||||
# Add RSI (14)
|
||||
df['rsi'] = ta.rsi(df['close'], length=14)
|
||||
df['rsi'] = ta.momentum.rsi(df['close'], window=14)
|
||||
|
||||
# Add MACD
|
||||
macd = ta.macd(df['close'])
|
||||
df['macd'] = macd['MACD_12_26_9']
|
||||
df['macd_signal'] = macd['MACDs_12_26_9']
|
||||
df['macd_hist'] = macd['MACDh_12_26_9']
|
||||
macd = ta.trend.MACD(df['close'])
|
||||
df['macd'] = macd.macd()
|
||||
df['macd_signal'] = macd.macd_signal()
|
||||
df['macd_hist'] = macd.macd_diff()
|
||||
|
||||
# Add Bollinger Bands
|
||||
bbands = ta.bbands(df['close'], length=20)
|
||||
df['bb_upper'] = bbands['BBU_20_2.0']
|
||||
df['bb_middle'] = bbands['BBM_20_2.0']
|
||||
df['bb_lower'] = bbands['BBL_20_2.0']
|
||||
bbands = ta.volatility.BollingerBands(df['close'])
|
||||
df['bb_upper'] = bbands.bollinger_hband()
|
||||
df['bb_middle'] = bbands.bollinger_mavg()
|
||||
df['bb_lower'] = bbands.bollinger_lband()
|
||||
|
||||
# Add ATR (Average True Range)
|
||||
df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
|
||||
df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14)
|
||||
|
||||
# Add moving averages
|
||||
df['sma_20'] = ta.sma(df['close'], length=20)
|
||||
df['sma_50'] = ta.sma(df['close'], length=50)
|
||||
df['ema_20'] = ta.ema(df['close'], length=20)
|
||||
df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
|
||||
df['sma_50'] = ta.trend.sma_indicator(df['close'], window=50)
|
||||
df['ema_20'] = ta.trend.ema_indicator(df['close'], window=20)
|
||||
|
||||
# Add OBV (On-Balance Volume)
|
||||
df['obv'] = ta.obv(df['close'], df['volume'])
|
||||
df['obv'] = ta.volume.on_balance_volume(df['close'], df['volume'])
|
||||
|
||||
# Add momentum indicators
|
||||
df['mom'] = ta.mom(df['close'], length=10)
|
||||
df['mom'] = ta.momentum.roc(df['close'], window=10)
|
||||
|
||||
# Normalize price to previous close
|
||||
df['close_norm'] = df['close'] / df['close'].shift(1) - 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user