This commit is contained in:
Dobromir Popov
2025-05-13 17:19:52 +03:00
parent 7dda00b64a
commit c0872248ab
60 changed files with 42085 additions and 6885 deletions

View File

@ -78,17 +78,25 @@ class CNNPyTorch(nn.Module):
window_size, num_features = input_shape
self.window_size = window_size
# Simpler architecture with fewer layers and dropout
# Increased complexity
self.conv1 = nn.Sequential(
nn.Conv1d(num_features, 32, kernel_size=3, padding=1),
nn.BatchNorm1d(32),
nn.Conv1d(num_features, 64, kernel_size=3, padding=1), # Increased filters
nn.BatchNorm1d(64),
nn.ReLU(),
nn.Dropout(0.2)
)
self.conv2 = nn.Sequential(
nn.Conv1d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64),
nn.Conv1d(64, 128, kernel_size=3, padding=1), # Increased filters
nn.BatchNorm1d(128),
nn.ReLU(),
nn.Dropout(0.2)
)
# Added third conv layer
self.conv3 = nn.Sequential(
nn.Conv1d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.Dropout(0.2)
)
@ -96,12 +104,12 @@ class CNNPyTorch(nn.Module):
# Global average pooling to handle variable length sequences
self.global_pool = nn.AdaptiveAvgPool1d(1)
# Fully connected layers
# Fully connected layers (updated input size and hidden size)
self.fc = nn.Sequential(
nn.Linear(64, 32),
nn.Linear(128, 64), # Updated input size from conv3, increased hidden size
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(32, output_size)
nn.Linear(64, output_size)
)
def forward(self, x):
@ -120,10 +128,11 @@ class CNNPyTorch(nn.Module):
# Convolutional layers
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x) # Added conv3 pass
# Global pooling
x = self.global_pool(x)
x = x.squeeze(-1)
x = x.squeeze(-1) # Shape becomes [batch, 128]
# Fully connected layers
action_logits = self.fc(x)
@ -216,6 +225,8 @@ class CNNModelPyTorch:
self.last_actions = [[] for _ in range(num_pairs)] # Track recent actions per pair
def train_epoch(self, X_train, y_train, future_prices, batch_size):
# Add a call to predict_extrema here
self.predict_extrema(X_train)
"""Train the model for one epoch with focus on short-term pattern recognition"""
self.model.train()
total_loss = 0
@ -321,7 +332,8 @@ class CNNModelPyTorch:
return avg_loss, 0, accuracy # Return 0 for price_loss as we're not using it
def predict(self, X):
def predict_extrema(self, X):
# Predict local extrema (lows and highs) based on input data
"""Make predictions optimized for short-term high-leverage trading signals"""
self.model.eval()

View File

@ -54,6 +54,7 @@ class DQNAgent:
self.epsilon = epsilon
self.epsilon_min = epsilon_min
self.epsilon_decay = epsilon_decay
self.epsilon_start = epsilon # Store initial epsilon value for resets/bumps
self.buffer_size = buffer_size
self.batch_size = batch_size
self.target_update = target_update
@ -127,6 +128,28 @@ class DQNAgent:
self.best_reward = -float('inf')
self.no_improvement_count = 0
# Confidence tracking
self.confidence_history = []
self.avg_confidence = 0.0
self.max_confidence = 0.0
self.min_confidence = 1.0
# Trade action fee and confidence thresholds
self.trade_action_fee = 0.0005 # Small fee to discourage unnecessary trading
self.minimum_action_confidence = 0.5 # Minimum confidence to consider trading
self.recent_actions = [] # Track recent actions to avoid oscillations
# Violent move detection
self.price_history = []
self.volatility_window = 20 # Window size for volatility calculation
self.volatility_threshold = 0.0015 # Threshold for considering a move "violent"
self.post_violent_move = False # Flag for recent violent move
self.violent_move_cooldown = 0 # Cooldown after violent move
# Feature integration
self.last_hidden_features = None # Store last extracted features
self.feature_history = [] # Store history of features for analysis
# Check if mixed precision training should be used
self.use_mixed_precision = False
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
@ -146,6 +169,7 @@ class DQNAgent:
self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0]] # Default timeframes
logger.info(f"DQN Agent using device: {self.device}")
logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
def move_models_to_device(self, device=None):
"""Move models to the specified device (GPU/CPU)"""
@ -189,8 +213,20 @@ class DQNAgent:
current_price = state[-1] # Last feature
next_price = next_state[-1]
# Calculate price change
price_change = (next_price - current_price) / current_price
# Calculate price change - avoid division by zero
if np.isscalar(current_price) and current_price != 0:
price_change = (next_price - current_price) / current_price
elif isinstance(current_price, np.ndarray):
# Handle array case - protect against division by zero
with np.errstate(divide='ignore', invalid='ignore'):
price_change = (next_price - current_price) / current_price
# Replace infinities and NaNs with zeros
if isinstance(price_change, np.ndarray):
price_change = np.nan_to_num(price_change, nan=0.0, posinf=0.0, neginf=0.0)
else:
price_change = 0.0 if np.isnan(price_change) or np.isinf(price_change) else price_change
else:
price_change = 0.0
# Check if this is a significant price movement
if abs(price_change) > 0.002: # Significant price change
@ -264,9 +300,17 @@ class DQNAgent:
# Get predictions using the policy network
self.policy_net.eval() # Set to evaluation mode for inference
action_probs, extrema_pred, price_predictions = self.policy_net(state_tensor)
action_probs, extrema_pred, price_predictions, hidden_features = self.policy_net(state_tensor)
self.policy_net.train() # Back to training mode
# Store hidden features for integration
self.last_hidden_features = hidden_features.cpu().numpy()
# Track feature history (limited size)
self.feature_history.append(hidden_features.cpu().numpy())
if len(self.feature_history) > 100:
self.feature_history = self.feature_history[-100:]
# Get the predicted extrema class (0=bottom, 1=top, 2=neither)
extrema_class = extrema_pred.argmax(dim=1).item()
extrema_confidence = torch.softmax(extrema_pred, dim=1)[0, extrema_class].item()
@ -336,17 +380,120 @@ class DQNAgent:
# Get the action with highest Q-value
action = action_probs.argmax().item()
# Calculate overall confidence in the action
q_values_softmax = F.softmax(action_probs, dim=1)[0]
action_confidence = q_values_softmax[action].item()
# Track confidence metrics
self.confidence_history.append(action_confidence)
if len(self.confidence_history) > 100:
self.confidence_history = self.confidence_history[-100:]
# Update confidence metrics
self.avg_confidence = sum(self.confidence_history) / len(self.confidence_history)
self.max_confidence = max(self.max_confidence, action_confidence)
self.min_confidence = min(self.min_confidence, action_confidence)
# Log average confidence occasionally
if random.random() < 0.01: # 1% of the time
logger.info(f"Confidence metrics - Current: {action_confidence:.4f}, Avg: {self.avg_confidence:.4f}, " +
f"Min: {self.min_confidence:.4f}, Max: {self.max_confidence:.4f}")
# Track price for violent move detection
try:
# Extract current price from state (assuming it's in the last position)
if len(state.shape) > 1: # For 2D state
current_price = state[-1, -1]
else: # For 1D state
current_price = state[-1]
self.price_history.append(current_price)
if len(self.price_history) > self.volatility_window:
self.price_history = self.price_history[-self.volatility_window:]
# Detect violent price moves if we have enough price history
if len(self.price_history) >= 5:
# Calculate short-term volatility
recent_prices = self.price_history[-5:]
# Make sure we're working with scalar values, not arrays
if isinstance(recent_prices[0], np.ndarray):
# If prices are arrays, extract the last value (current price)
recent_prices = [p[-1] if isinstance(p, np.ndarray) and p.size > 0 else p for p in recent_prices]
# Calculate price changes with protection against division by zero
price_changes = []
for i in range(1, len(recent_prices)):
if recent_prices[i-1] != 0 and not np.isnan(recent_prices[i-1]) and not np.isnan(recent_prices[i]):
change = (recent_prices[i] - recent_prices[i-1]) / recent_prices[i-1]
price_changes.append(change)
else:
price_changes.append(0.0)
# Calculate volatility as sum of absolute price changes
volatility = sum([abs(change) for change in price_changes])
# Check if we've had a violent move
if volatility > self.volatility_threshold:
logger.info(f"Violent price move detected! Volatility: {volatility:.6f}")
self.post_violent_move = True
self.violent_move_cooldown = 10 # Set cooldown period
# Handle post-violent move period
if self.post_violent_move:
if self.violent_move_cooldown > 0:
self.violent_move_cooldown -= 1
# Increase confidence threshold temporarily after violent moves
effective_threshold = self.minimum_action_confidence * 1.1
logger.info(f"Post-violent move period: {self.violent_move_cooldown} steps remaining. " +
f"Using higher confidence threshold: {effective_threshold:.4f}")
else:
self.post_violent_move = False
logger.info("Post-violent move period ended")
except Exception as e:
logger.warning(f"Error in violent move detection: {str(e)}")
# Apply trade action fee to buy/sell actions but not to hold
# This creates a threshold that must be exceeded to justify a trade
action_values = action_probs.clone()
# If BUY or SELL, apply fee by reducing the Q-value
if action == 0 or action == 1: # BUY or SELL
# Check if confidence is above minimum threshold
effective_threshold = self.minimum_action_confidence
if self.post_violent_move:
effective_threshold *= 1.1 # Higher threshold after violent moves
if action_confidence < effective_threshold:
# If confidence is below threshold, force HOLD action
logger.info(f"Action {action} confidence {action_confidence:.4f} below threshold {effective_threshold}, forcing HOLD")
action = 2 # HOLD
else:
# Apply trade action fee to ensure we only trade when there's clear benefit
fee_adjusted_action_values = action_values.clone()
fee_adjusted_action_values[0, 0] -= self.trade_action_fee # Reduce BUY value
fee_adjusted_action_values[0, 1] -= self.trade_action_fee # Reduce SELL value
# Hold value remains unchanged
# Re-determine the action based on fee-adjusted values
fee_adjusted_action = fee_adjusted_action_values.argmax().item()
# If the fee changes our decision, log this
if fee_adjusted_action != action:
logger.info(f"Trade action fee changed decision from {action} to {fee_adjusted_action}")
action = fee_adjusted_action
# Adjust action based on extrema and price predictions
# Prioritize short-term movement for trading decisions
if immediate_conf > 0.8: # Only adjust for strong signals
if immediate_direction == 2: # UP prediction
# Bias toward BUY for strong up predictions
if action != 0 and random.random() < 0.3 * immediate_conf:
if action != 0 and action != 2 and random.random() < 0.3 * immediate_conf:
logger.info(f"Adjusting action to BUY based on immediate UP prediction")
action = 0 # BUY
elif immediate_direction == 0: # DOWN prediction
# Bias toward SELL for strong down predictions
if action != 1 and random.random() < 0.3 * immediate_conf:
if action != 1 and action != 2 and random.random() < 0.3 * immediate_conf:
logger.info(f"Adjusting action to SELL based on immediate DOWN prediction")
action = 1 # SELL
@ -354,333 +501,217 @@ class DQNAgent:
if extrema_confidence > 0.8: # Only adjust for strong signals
if extrema_class == 0: # Bottom detected
# Bias toward BUY at bottoms
if action != 0 and random.random() < 0.3 * extrema_confidence:
if action != 0 and action != 2 and random.random() < 0.3 * extrema_confidence:
logger.info(f"Adjusting action to BUY based on bottom detection")
action = 0 # BUY
elif extrema_class == 1: # Top detected
# Bias toward SELL at tops
if action != 1 and random.random() < 0.3 * extrema_confidence:
if action != 1 and action != 2 and random.random() < 0.3 * extrema_confidence:
logger.info(f"Adjusting action to SELL based on top detection")
action = 1 # SELL
# Finally, avoid action oscillation by checking recent history
if len(self.recent_actions) >= 2:
last_action = self.recent_actions[-1]
if action != last_action and action != 2 and last_action != 2:
# We're switching between BUY and SELL too quickly
# Only allow this if we have very high confidence
if action_confidence < 0.85:
logger.info(f"Preventing oscillation from {last_action} to {action}, forcing HOLD")
action = 2 # HOLD
# Update recent actions list
self.recent_actions.append(action)
if len(self.recent_actions) > 5:
self.recent_actions = self.recent_actions[-5:]
return action
def replay(self, use_prioritized=True) -> float:
"""Experience replay - learn from stored experiences
Args:
use_prioritized: Whether to use prioritized experience replay
Returns:
float: Training loss
"""
# Check if we have enough samples
if len(self.memory) < self.batch_size:
def replay(self, experiences=None):
"""Train the model using experiences from memory"""
# Don't train if not in training mode
if not self.training:
return 0.0
# Check if mixed precision should be disabled
if 'DISABLE_MIXED_PRECISION' in os.environ:
self.use_mixed_precision = False
# If no experiences provided, sample from memory
if experiences is None:
# Skip if memory is too small
if len(self.memory) < self.batch_size:
return 0.0
# Sample from memory with or without prioritization
if use_prioritized and len(self.positive_memory) > self.batch_size // 4:
# Use prioritized sampling: mix normal samples with positive reward samples
positive_batch_size = min(self.batch_size // 4, len(self.positive_memory))
regular_batch_size = self.batch_size - positive_batch_size
# Get positive examples
positive_batch = random.sample(self.positive_memory, positive_batch_size)
# Get regular examples
regular_batch = random.sample(self.memory, regular_batch_size)
# Combine batches
minibatch = positive_batch + regular_batch
else:
# Use regular uniform sampling
minibatch = random.sample(self.memory, self.batch_size)
# Sample random mini-batch from memory
indices = np.random.choice(len(self.memory), size=min(self.batch_size, len(self.memory)), replace=False)
experiences = [self.memory[i] for i in indices]
# Extract batches with proper tensor conversion
states = np.vstack([self._normalize_state(x[0]) for x in minibatch])
actions = np.array([x[1] for x in minibatch])
rewards = np.array([x[2] for x in minibatch])
next_states = np.vstack([self._normalize_state(x[3]) for x in minibatch])
dones = np.array([x[4] for x in minibatch], dtype=np.float32)
# Convert to torch tensors and move to device
states_tensor = torch.FloatTensor(states).to(self.device)
actions_tensor = torch.LongTensor(actions).to(self.device)
rewards_tensor = torch.FloatTensor(rewards).to(self.device)
next_states_tensor = torch.FloatTensor(next_states).to(self.device)
dones_tensor = torch.FloatTensor(dones).to(self.device)
# First training step with mixed precision if available
# Choose appropriate replay method
if self.use_mixed_precision:
loss = self._replay_mixed_precision(
states_tensor, actions_tensor, rewards_tensor,
next_states_tensor, dones_tensor
)
# Convert experiences to tensors for mixed precision
states = torch.FloatTensor(np.array([e[0] for e in experiences])).to(self.device)
actions = torch.LongTensor(np.array([e[1] for e in experiences])).to(self.device)
rewards = torch.FloatTensor(np.array([e[2] for e in experiences])).to(self.device)
next_states = torch.FloatTensor(np.array([e[3] for e in experiences])).to(self.device)
dones = torch.FloatTensor(np.array([e[4] for e in experiences])).to(self.device)
# Use mixed precision replay
loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
else:
loss = self._replay_standard(
states_tensor, actions_tensor, rewards_tensor,
next_states_tensor, dones_tensor
)
# Pass experiences directly to standard replay method
loss = self._replay_standard(experiences)
# Training focus selector - randomly focus on one of the specialized training types
training_focus = random.random()
# Occasionally train specifically on extrema points
if training_focus < 0.3 and hasattr(self, 'extrema_memory') and len(self.extrema_memory) >= self.batch_size // 2:
# Sample from extrema memory
extrema_batch_size = min(self.batch_size // 2, len(self.extrema_memory))
extrema_batch = random.sample(self.extrema_memory, extrema_batch_size)
# Extract batches with proper tensor conversion
extrema_states = np.vstack([self._normalize_state(x[0]) for x in extrema_batch])
extrema_actions = np.array([x[1] for x in extrema_batch])
extrema_rewards = np.array([x[2] for x in extrema_batch])
extrema_next_states = np.vstack([self._normalize_state(x[3]) for x in extrema_batch])
extrema_dones = np.array([x[4] for x in extrema_batch], dtype=np.float32)
# Convert to torch tensors and move to device
extrema_states_tensor = torch.FloatTensor(extrema_states).to(self.device)
extrema_actions_tensor = torch.LongTensor(extrema_actions).to(self.device)
extrema_rewards_tensor = torch.FloatTensor(extrema_rewards).to(self.device)
extrema_next_states_tensor = torch.FloatTensor(extrema_next_states).to(self.device)
extrema_dones_tensor = torch.FloatTensor(extrema_dones).to(self.device)
# Additional training step focused on extrema points (with smaller learning rate)
original_lr = self.optimizer.param_groups[0]['lr']
# Temporarily reduce learning rate for fine-tuning on extrema
for param_group in self.optimizer.param_groups:
param_group['lr'] = original_lr * 0.5
# Train on extrema
if self.use_mixed_precision:
extrema_loss = self._replay_mixed_precision(
extrema_states_tensor, extrema_actions_tensor, extrema_rewards_tensor,
extrema_next_states_tensor, extrema_dones_tensor
)
else:
extrema_loss = self._replay_standard(
extrema_states_tensor, extrema_actions_tensor, extrema_rewards_tensor,
extrema_next_states_tensor, extrema_dones_tensor
)
# Restore original learning rate
for param_group in self.optimizer.param_groups:
param_group['lr'] = original_lr
logger.info(f"Extra training on extrema points: loss={extrema_loss:.4f}")
# Average the loss
loss = (loss + extrema_loss) / 2
# Occasionally train specifically on price movement data
elif training_focus >= 0.3 and training_focus < 0.6 and hasattr(self, 'price_movement_memory') and len(self.price_movement_memory) >= self.batch_size // 2:
# Sample from price movement memory
price_batch_size = min(self.batch_size // 2, len(self.price_movement_memory))
price_batch = random.sample(self.price_movement_memory, price_batch_size)
# Extract batches with proper tensor conversion
price_states = np.vstack([self._normalize_state(x[0]) for x in price_batch])
price_actions = np.array([x[1] for x in price_batch])
price_rewards = np.array([x[2] for x in price_batch])
price_next_states = np.vstack([self._normalize_state(x[3]) for x in price_batch])
price_dones = np.array([x[4] for x in price_batch], dtype=np.float32)
# Convert to torch tensors and move to device
price_states_tensor = torch.FloatTensor(price_states).to(self.device)
price_actions_tensor = torch.LongTensor(price_actions).to(self.device)
price_rewards_tensor = torch.FloatTensor(price_rewards).to(self.device)
price_next_states_tensor = torch.FloatTensor(price_next_states).to(self.device)
price_dones_tensor = torch.FloatTensor(price_dones).to(self.device)
# Additional training step focused on price movements (with smaller learning rate)
original_lr = self.optimizer.param_groups[0]['lr']
# Temporarily reduce learning rate
for param_group in self.optimizer.param_groups:
param_group['lr'] = original_lr * 0.5
# Train on price movement data
if self.use_mixed_precision:
price_loss = self._replay_mixed_precision(
price_states_tensor, price_actions_tensor, price_rewards_tensor,
price_next_states_tensor, price_dones_tensor
)
else:
price_loss = self._replay_standard(
price_states_tensor, price_actions_tensor, price_rewards_tensor,
price_next_states_tensor, price_dones_tensor
)
# Restore original learning rate
for param_group in self.optimizer.param_groups:
param_group['lr'] = original_lr
logger.info(f"Extra training on price movement data: loss={price_loss:.4f}")
# Average the loss
loss = (loss + price_loss) / 2
# Store and return loss
# Store loss for monitoring
self.losses.append(loss)
return loss
def _replay_standard(self, states, actions, rewards, next_states, dones):
"""Standard precision training step"""
# Zero gradients
self.optimizer.zero_grad()
# Get current Q values and extrema predictions
current_q_values, current_extrema_pred, current_price_pred = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Get next Q values from target network
with torch.no_grad():
next_q_values, next_extrema_pred, next_price_pred = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
# Check for dimension mismatch and fix it
if rewards.shape[0] != next_q_values.shape[0]:
# Log the shape mismatch for debugging
logger.warning(f"Shape mismatch detected in standard replay: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
# Use the smaller size to prevent index errors
min_size = min(rewards.shape[0], next_q_values.shape[0])
rewards = rewards[:min_size]
dones = dones[:min_size]
next_q_values = next_q_values[:min_size]
current_q_values = current_q_values[:min_size]
target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
# Compute Q-value loss (primary task)
q_loss = nn.MSELoss()(current_q_values, target_q_values)
# Initialize combined loss with Q-value loss
loss = q_loss
# Try to extract price from current and next states
try:
# Extract price feature from sequence data (if available)
if len(states.shape) == 3: # [batch, seq, features]
current_prices = states[:, -1, -1] # Last timestep, last feature
next_prices = next_states[:, -1, -1]
else: # [batch, features]
current_prices = states[:, -1] # Last feature
next_prices = next_states[:, -1]
# Compute price changes for different timeframes
immediate_changes = (next_prices - current_prices) / current_prices
# Create price direction labels - simplified for training
# 0 = down, 1 = sideways, 2 = up
immediate_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1 # Default: sideways
midterm_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1
longterm_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1
# Immediate term direction (1s, 1m)
immediate_up = (immediate_changes > 0.0005)
immediate_down = (immediate_changes < -0.0005)
immediate_labels[immediate_up] = 2 # Up
immediate_labels[immediate_down] = 0 # Down
# For mid and long term, we can only approximate during training
# In a real system, we'd need historical data to validate these
# Here we'll use the immediate term with increasing thresholds as approximation
# Mid-term (1h) - use slightly higher threshold
midterm_up = (immediate_changes > 0.001)
midterm_down = (immediate_changes < -0.001)
midterm_labels[midterm_up] = 2 # Up
midterm_labels[midterm_down] = 0 # Down
# Long-term (1d) - use even higher threshold
longterm_up = (immediate_changes > 0.002)
longterm_down = (immediate_changes < -0.002)
longterm_labels[longterm_up] = 2 # Up
longterm_labels[longterm_down] = 0 # Down
# Generate target values for price change regression
# For simplicity, we'll use the immediate change and scaled versions for longer timeframes
price_value_targets = torch.zeros((min_size, 4), device=self.device)
price_value_targets[:, 0] = immediate_changes
price_value_targets[:, 1] = immediate_changes * 2.0 # Approximate 1h change
price_value_targets[:, 2] = immediate_changes * 4.0 # Approximate 1d change
price_value_targets[:, 3] = immediate_changes * 6.0 # Approximate 1w change
# Calculate loss for price direction prediction (classification)
if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= min_size:
# Slice predictions to match the adjusted batch size
immediate_pred = current_price_pred['immediate'][:min_size]
midterm_pred = current_price_pred['midterm'][:min_size]
longterm_pred = current_price_pred['longterm'][:min_size]
price_values_pred = current_price_pred['values'][:min_size]
# Compute losses for each task
immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
# MSE loss for price value regression
price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
# Combine all price prediction losses
price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
# Create extrema labels (same as before)
extrema_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 2 # Default: neither
# Identify potential bottoms (significant negative change)
bottoms = (immediate_changes < -0.003)
extrema_labels[bottoms] = 0
# Identify potential tops (significant positive change)
tops = (immediate_changes > 0.003)
extrema_labels[tops] = 1
# Calculate extrema prediction loss
if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= min_size:
current_extrema_pred = current_extrema_pred[:min_size]
extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
# Combined loss with all components
# Primary task: Q-value learning (RL objective)
# Secondary tasks: extrema detection and price prediction (supervised objectives)
loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
# Log loss components occasionally
if random.random() < 0.01: # Log 1% of the time
logger.info(
f"Training losses: Q-loss={q_loss.item():.4f}, "
f"Extrema-loss={extrema_loss.item():.4f}, "
f"Price-loss={price_loss.item():.4f}, "
f"Imm-loss={immediate_loss.item():.4f}, "
f"Mid-loss={midterm_loss.item():.4f}, "
f"Long-loss={longterm_loss.item():.4f}"
)
except Exception as e:
# Fallback if price extraction fails
logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
# Just use Q-value loss
loss = q_loss
# Backward pass and optimize
loss.backward()
# Gradient clipping to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
self.optimizer.step()
# Update target network if needed
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
# Track and decay epsilon
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
return loss.item()
# Randomly decide if we should train on extrema points from special memory
if random.random() < 0.3 and len(self.extrema_memory) >= self.batch_size:
# Train specifically on extrema memory examples
extrema_indices = np.random.choice(len(self.extrema_memory), size=min(self.batch_size, len(self.extrema_memory)), replace=False)
extrema_batch = [self.extrema_memory[i] for i in extrema_indices]
# Extract tensors from extrema batch
extrema_states = torch.FloatTensor(np.array([e[0] for e in extrema_batch])).to(self.device)
extrema_actions = torch.LongTensor(np.array([e[1] for e in extrema_batch])).to(self.device)
extrema_rewards = torch.FloatTensor(np.array([e[2] for e in extrema_batch])).to(self.device)
extrema_next_states = torch.FloatTensor(np.array([e[3] for e in extrema_batch])).to(self.device)
extrema_dones = torch.FloatTensor(np.array([e[4] for e in extrema_batch])).to(self.device)
# Use a slightly reduced learning rate for extrema training
old_lr = self.optimizer.param_groups[0]['lr']
self.optimizer.param_groups[0]['lr'] = old_lr * 0.8
# Train on extrema memory
if self.use_mixed_precision:
extrema_loss = self._replay_mixed_precision(extrema_states, extrema_actions, extrema_rewards, extrema_next_states, extrema_dones)
else:
extrema_loss = self._replay_standard(extrema_batch)
# Reset learning rate
self.optimizer.param_groups[0]['lr'] = old_lr
# Log extrema loss
logger.info(f"Extra training on extrema points, loss: {extrema_loss:.4f}")
# Randomly train on price movement examples (similar to extrema)
if random.random() < 0.3 and len(self.price_movement_memory) >= self.batch_size:
# Train specifically on price movement memory examples
price_indices = np.random.choice(len(self.price_movement_memory), size=min(self.batch_size, len(self.price_movement_memory)), replace=False)
price_batch = [self.price_movement_memory[i] for i in price_indices]
# Extract tensors from price movement batch
price_states = torch.FloatTensor(np.array([e[0] for e in price_batch])).to(self.device)
price_actions = torch.LongTensor(np.array([e[1] for e in price_batch])).to(self.device)
price_rewards = torch.FloatTensor(np.array([e[2] for e in price_batch])).to(self.device)
price_next_states = torch.FloatTensor(np.array([e[3] for e in price_batch])).to(self.device)
price_dones = torch.FloatTensor(np.array([e[4] for e in price_batch])).to(self.device)
# Use a slightly reduced learning rate for price movement training
old_lr = self.optimizer.param_groups[0]['lr']
self.optimizer.param_groups[0]['lr'] = old_lr * 0.75
# Train on price movement memory
if self.use_mixed_precision:
price_loss = self._replay_mixed_precision(price_states, price_actions, price_rewards, price_next_states, price_dones)
else:
price_loss = self._replay_standard(price_batch)
# Reset learning rate
self.optimizer.param_groups[0]['lr'] = old_lr
# Log price movement loss
logger.info(f"Extra training on price movement examples, loss: {price_loss:.4f}")
return loss
def _replay_standard(self, experiences=None):
"""Standard training step without mixed precision"""
try:
# Use experiences if provided, otherwise sample from memory
if experiences is None:
# If memory is too small, skip training
if len(self.memory) < self.batch_size:
return 0.0
# Sample random mini-batch from memory
indices = np.random.choice(len(self.memory), size=min(self.batch_size, len(self.memory)), replace=False)
batch = [self.memory[i] for i in indices]
experiences = batch
# Unpack experiences
states, actions, rewards, next_states, dones = zip(*experiences)
# Convert to PyTorch tensors
states = torch.FloatTensor(np.array(states)).to(self.device)
actions = torch.LongTensor(np.array(actions)).to(self.device)
rewards = torch.FloatTensor(np.array(rewards)).to(self.device)
next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
dones = torch.FloatTensor(np.array(dones)).to(self.device)
# Get current Q values
current_q_values, current_extrema_pred, current_price_pred, hidden_features = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Get next Q values with target network
with torch.no_grad():
next_q_values, next_extrema_pred, next_price_pred, next_hidden_features = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
# Check for dimension mismatch between rewards and next_q_values
if rewards.shape[0] != next_q_values.shape[0]:
logger.warning(f"Shape mismatch detected in standard replay: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
# Use the smaller size to prevent index error
min_size = min(rewards.shape[0], next_q_values.shape[0])
rewards = rewards[:min_size]
dones = dones[:min_size]
next_q_values = next_q_values[:min_size]
current_q_values = current_q_values[:min_size]
# Calculate target Q values
target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
# Compute loss for Q value
q_loss = self.criterion(current_q_values, target_q_values)
# Try to compute extrema loss if possible
try:
# Get the target classes from extrema predictions
extrema_targets = torch.argmax(current_extrema_pred, dim=1).long()
# Compute extrema loss using cross-entropy - this is an auxiliary task
extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
# Combined loss with emphasis on Q-learning
total_loss = q_loss + 0.1 * extrema_loss
except Exception as e:
logger.warning(f"Failed to calculate extrema loss: {str(e)}. Using only Q-value loss.")
total_loss = q_loss
# Reset gradients
self.optimizer.zero_grad()
# Backward pass
total_loss.backward()
# Clip gradients to avoid exploding gradients
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
# Update weights
self.optimizer.step()
# Update target network if needed
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
# Return loss
return total_loss.item()
except Exception as e:
logger.error(f"Error in replay standard: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return 0.0
def _replay_mixed_precision(self, states, actions, rewards, next_states, dones):
"""Mixed precision training step for better GPU performance"""
@ -696,12 +727,12 @@ class DQNAgent:
# Forward pass with amp autocasting
with torch.cuda.amp.autocast():
# Get current Q values and extrema predictions
current_q_values, current_extrema_pred, current_price_pred = self.policy_net(states)
current_q_values, current_extrema_pred, current_price_pred, hidden_features = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Get next Q values from target network
with torch.no_grad():
next_q_values, next_extrema_pred, next_price_pred = self.target_net(next_states)
next_q_values, next_extrema_pred, next_price_pred, next_hidden_features = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
# Check for dimension mismatch and fix it
@ -733,7 +764,7 @@ class DQNAgent:
current_prices = states[:, -1] # Last feature
next_prices = next_states[:, -1]
# Compute price changes for different timeframes
# Calculate price change for different timeframes
immediate_changes = (next_prices - current_prices) / current_prices
# Create price direction labels - simplified for training

View File

@ -0,0 +1,329 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import random
from typing import Tuple, List
import os
import sys
import logging
import torch.nn.functional as F
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
# Import the EnhancedCNN model
from NN.models.enhanced_cnn import EnhancedCNN, ExampleSiftingDataset
# Configure logger
logger = logging.getLogger(__name__)
class EnhancedDQNAgent:
"""
Enhanced Deep Q-Network agent for trading
Uses the improved EnhancedCNN model with residual connections and attention mechanisms
"""
def __init__(self,
state_shape: Tuple[int, ...],
n_actions: int,
learning_rate: float = 0.0003, # Slightly reduced learning rate for stability
gamma: float = 0.95, # Discount factor
epsilon: float = 1.0,
epsilon_min: float = 0.05,
epsilon_decay: float = 0.995, # Slower decay for more exploration
buffer_size: int = 50000, # Larger memory buffer
batch_size: int = 128, # Larger batch size
target_update: int = 10, # More frequent target updates
confidence_threshold: float = 0.4, # Lower confidence threshold
device=None):
# Extract state dimensions
if isinstance(state_shape, tuple) and len(state_shape) > 1:
# Multi-dimensional state (like image or sequence)
self.state_dim = state_shape
else:
# 1D state
if isinstance(state_shape, tuple):
self.state_dim = state_shape[0]
else:
self.state_dim = state_shape
# Store parameters
self.n_actions = n_actions
self.learning_rate = learning_rate
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_min = epsilon_min
self.epsilon_decay = epsilon_decay
self.buffer_size = buffer_size
self.batch_size = batch_size
self.target_update = target_update
self.confidence_threshold = confidence_threshold
# Set device for computation
if device is None:
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = device
# Initialize models with the enhanced CNN
self.policy_net = EnhancedCNN(self.state_dim, self.n_actions, self.confidence_threshold)
self.target_net = EnhancedCNN(self.state_dim, self.n_actions, self.confidence_threshold)
# Initialize the target network with the same weights as the policy network
self.target_net.load_state_dict(self.policy_net.state_dict())
# Set models to eval mode (important for batch norm, dropout)
self.target_net.eval()
# Optimization components
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.learning_rate)
self.criterion = nn.MSELoss()
# Experience replay memory with example sifting
self.memory = ExampleSiftingDataset(max_examples=buffer_size)
self.update_count = 0
# Confidence tracking
self.confidence_history = []
self.avg_confidence = 0.0
self.max_confidence = 0.0
self.min_confidence = 1.0
# Performance tracking
self.losses = []
self.rewards = []
self.avg_reward = 0.0
# Check if mixed precision training should be used
self.use_mixed_precision = False
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
self.use_mixed_precision = True
self.scaler = torch.cuda.amp.GradScaler()
logger.info("Mixed precision training enabled")
else:
logger.info("Mixed precision training disabled")
# For compatibility with old code
self.action_size = n_actions
logger.info(f"Enhanced DQN Agent using device: {self.device}")
logger.info(f"Confidence threshold set to {self.confidence_threshold}")
def move_models_to_device(self, device=None):
"""Move models to the specified device (GPU/CPU)"""
if device is not None:
self.device = device
try:
self.policy_net = self.policy_net.to(self.device)
self.target_net = self.target_net.to(self.device)
logger.info(f"Moved models to {self.device}")
return True
except Exception as e:
logger.error(f"Failed to move models to {self.device}: {str(e)}")
return False
def _normalize_state(self, state):
"""Normalize state for better training stability"""
try:
# Convert to numpy array if needed
if isinstance(state, list):
state = np.array(state, dtype=np.float32)
# Apply normalization based on state shape
if len(state.shape) > 1:
# Multi-dimensional state - normalize each feature dimension separately
for i in range(state.shape[0]):
# Skip if all zeros (to avoid division by zero)
if np.sum(np.abs(state[i])) > 0:
# Standardize each feature dimension
mean = np.mean(state[i])
std = np.std(state[i])
if std > 0:
state[i] = (state[i] - mean) / std
else:
# 1D state vector
# Skip if all zeros
if np.sum(np.abs(state)) > 0:
mean = np.mean(state)
std = np.std(state)
if std > 0:
state = (state - mean) / std
return state
except Exception as e:
logger.warning(f"Error normalizing state: {str(e)}")
return state
def remember(self, state, action, reward, next_state, done):
"""Store experience in memory with example sifting"""
self.memory.add_example(state, action, reward, next_state, done)
# Also track rewards for monitoring
self.rewards.append(reward)
if len(self.rewards) > 100:
self.rewards = self.rewards[-100:]
self.avg_reward = np.mean(self.rewards)
def act(self, state, explore=True):
"""Choose action using epsilon-greedy policy with built-in confidence thresholding"""
if explore and random.random() < self.epsilon:
return random.randrange(self.n_actions), 0.0 # Return action and zero confidence
# Normalize state before inference
normalized_state = self._normalize_state(state)
# Use the EnhancedCNN's act method which includes confidence thresholding
action, confidence = self.policy_net.act(normalized_state, explore=explore)
# Track confidence metrics
self.confidence_history.append(confidence)
if len(self.confidence_history) > 100:
self.confidence_history = self.confidence_history[-100:]
# Update confidence metrics
self.avg_confidence = sum(self.confidence_history) / len(self.confidence_history)
self.max_confidence = max(self.max_confidence, confidence)
self.min_confidence = min(self.min_confidence, confidence)
# Log average confidence occasionally
if random.random() < 0.01: # 1% of the time
logger.info(f"Confidence metrics - Current: {confidence:.4f}, Avg: {self.avg_confidence:.4f}, " +
f"Min: {self.min_confidence:.4f}, Max: {self.max_confidence:.4f}")
return action, confidence
def replay(self):
"""Train the model using experience replay with high-quality examples"""
# Check if enough samples in memory
if len(self.memory) < self.batch_size:
return 0.0
# Get batch of experiences
batch = self.memory.get_batch(self.batch_size)
if batch is None:
return 0.0
states = torch.FloatTensor(batch['states']).to(self.device)
actions = torch.LongTensor(batch['actions']).to(self.device)
rewards = torch.FloatTensor(batch['rewards']).to(self.device)
next_states = torch.FloatTensor(batch['next_states']).to(self.device)
dones = torch.FloatTensor(batch['dones']).to(self.device)
# Compute Q values
self.policy_net.train() # Set to training mode
# Get current Q values
if self.use_mixed_precision:
with torch.cuda.amp.autocast():
# Get current Q values
q_values, _, _, _ = self.policy_net(states)
current_q = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Compute target Q values
with torch.no_grad():
self.target_net.eval()
next_q_values, _, _, _ = self.target_net(next_states)
next_q = next_q_values.max(1)[0]
target_q = rewards + (1 - dones) * self.gamma * next_q
# Compute loss
loss = self.criterion(current_q, target_q)
# Perform backpropagation with mixed precision
self.optimizer.zero_grad()
self.scaler.scale(loss).backward()
self.scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
self.scaler.step(self.optimizer)
self.scaler.update()
else:
# Standard precision training
# Get current Q values
q_values, _, _, _ = self.policy_net(states)
current_q = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Compute target Q values
with torch.no_grad():
self.target_net.eval()
next_q_values, _, _, _ = self.target_net(next_states)
next_q = next_q_values.max(1)[0]
target_q = rewards + (1 - dones) * self.gamma * next_q
# Compute loss
loss = self.criterion(current_q, target_q)
# Perform backpropagation
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
self.optimizer.step()
# Track loss
loss_value = loss.item()
self.losses.append(loss_value)
if len(self.losses) > 100:
self.losses = self.losses[-100:]
# Update target network
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
logger.info(f"Updated target network (step {self.update_count})")
# Decay epsilon
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return loss_value
def save(self, path):
"""Save agent state and models"""
self.policy_net.save(f"{path}_policy")
self.target_net.save(f"{path}_target")
# Save agent state
torch.save({
'epsilon': self.epsilon,
'confidence_threshold': self.confidence_threshold,
'losses': self.losses,
'rewards': self.rewards,
'avg_reward': self.avg_reward,
'confidence_history': self.confidence_history,
'avg_confidence': self.avg_confidence,
'max_confidence': self.max_confidence,
'min_confidence': self.min_confidence,
'update_count': self.update_count
}, f"{path}_agent_state.pt")
logger.info(f"Agent state saved to {path}_agent_state.pt")
def load(self, path):
"""Load agent state and models"""
policy_loaded = self.policy_net.load(f"{path}_policy")
target_loaded = self.target_net.load(f"{path}_target")
# Load agent state if available
agent_state_path = f"{path}_agent_state.pt"
if os.path.exists(agent_state_path):
try:
state = torch.load(agent_state_path)
self.epsilon = state.get('epsilon', self.epsilon)
self.confidence_threshold = state.get('confidence_threshold', self.confidence_threshold)
self.policy_net.confidence_threshold = self.confidence_threshold
self.target_net.confidence_threshold = self.confidence_threshold
self.losses = state.get('losses', [])
self.rewards = state.get('rewards', [])
self.avg_reward = state.get('avg_reward', 0.0)
self.confidence_history = state.get('confidence_history', [])
self.avg_confidence = state.get('avg_confidence', 0.0)
self.max_confidence = state.get('max_confidence', 0.0)
self.min_confidence = state.get('min_confidence', 1.0)
self.update_count = state.get('update_count', 0)
logger.info(f"Agent state loaded from {agent_state_path}")
except Exception as e:
logger.error(f"Error loading agent state: {str(e)}")
return policy_loaded and target_loaded

413
NN/models/enhanced_cnn.py Normal file
View File

@ -0,0 +1,413 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import logging
import torch.nn.functional as F
from typing import List, Tuple, Dict, Any, Optional, Union
# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ResidualBlock(nn.Module):
"""
Residual block with pre-activation (BatchNorm -> ReLU -> Conv)
"""
def __init__(self, in_channels, out_channels, stride=1):
super(ResidualBlock, self).__init__()
self.bn1 = nn.BatchNorm1d(in_channels)
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm1d(out_channels)
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
# Shortcut connection to match dimensions
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out)
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out += shortcut
return out
class SelfAttention(nn.Module):
"""
Self-attention mechanism for sequential data
"""
def __init__(self, dim):
super(SelfAttention, self).__init__()
self.query = nn.Linear(dim, dim)
self.key = nn.Linear(dim, dim)
self.value = nn.Linear(dim, dim)
self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32))
def forward(self, x):
# x shape: [batch_size, seq_len, dim]
batch_size, seq_len, dim = x.size()
q = self.query(x) # [batch_size, seq_len, dim]
k = self.key(x) # [batch_size, seq_len, dim]
v = self.value(x) # [batch_size, seq_len, dim]
# Calculate attention scores
scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len]
# Apply softmax to get attention weights
attention = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len]
# Apply attention to values
out = torch.matmul(attention, v) # [batch_size, seq_len, dim]
return out, attention
class EnhancedCNN(nn.Module):
"""
Enhanced CNN model with residual connections and attention mechanisms
for improved trading decision making
"""
def __init__(self, input_shape, n_actions, confidence_threshold=0.5):
super(EnhancedCNN, self).__init__()
# Store dimensions
self.input_shape = input_shape
self.n_actions = n_actions
self.confidence_threshold = confidence_threshold
# Calculate input dimensions
if isinstance(input_shape, (list, tuple)):
if len(input_shape) == 3: # [channels, height, width]
self.channels, self.height, self.width = input_shape
self.feature_dim = self.height * self.width
elif len(input_shape) == 2: # [timeframes, features]
self.channels = input_shape[0]
self.features = input_shape[1]
self.feature_dim = self.features * self.channels
elif len(input_shape) == 1: # [features]
self.channels = 1
self.features = input_shape[0]
self.feature_dim = self.features
else:
raise ValueError(f"Unsupported input shape: {input_shape}")
else: # single integer
self.channels = 1
self.features = input_shape
self.feature_dim = input_shape
# Build network
self._build_network()
# Initialize device
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.to(self.device)
logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}")
def _build_network(self):
"""Build the enhanced neural network with current feature dimensions"""
# 1D CNN for sequential data
if self.channels > 1:
# Reshape expected: [batch, timeframes, features]
self.conv_layers = nn.Sequential(
nn.Conv1d(self.channels, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.Dropout(0.2),
ResidualBlock(64, 128),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Dropout(0.3),
ResidualBlock(128, 256),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Dropout(0.4),
ResidualBlock(256, 512),
nn.AdaptiveAvgPool1d(1) # Global average pooling
)
# Feature dimension after conv layers
self.conv_features = 512
else:
# For 1D vectors, skip the convolutional part
self.conv_layers = None
self.conv_features = 0
# Fully connected layers for all cases
# We'll use deeper layers with skip connections
if self.conv_layers is None:
# For 1D inputs without conv preprocessing
self.fc1 = nn.Linear(self.feature_dim, 512)
self.features_dim = 512
else:
# For data processed by conv layers
self.fc1 = nn.Linear(self.conv_features, 512)
self.features_dim = 512
# Common feature extraction layers
self.fc_layers = nn.Sequential(
self.fc1,
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(512, 512),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(512, 256),
nn.ReLU()
)
# Dueling architecture
self.advantage_stream = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, self.n_actions)
)
self.value_stream = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 1)
)
# Extrema detection head with increased capacity
self.extrema_head = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(128, 3) # 0=bottom, 1=top, 2=neither
)
# Price prediction heads with increased capacity
self.price_pred_immediate = nn.Sequential(
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 3) # Up, Down, Sideways
)
self.price_pred_midterm = nn.Sequential(
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 3) # Up, Down, Sideways
)
self.price_pred_longterm = nn.Sequential(
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 3) # Up, Down, Sideways
)
# Value prediction with increased capacity
self.price_pred_value = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(128, 4) # % change for different timeframes
)
# Additional attention layer for feature refinement
self.attention = SelfAttention(256)
def _check_rebuild_network(self, features):
"""Check if network needs to be rebuilt for different feature dimensions"""
if features != self.feature_dim:
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
self.feature_dim = features
self._build_network()
# Move to device after rebuilding
self.to(self.device)
return True
return False
def forward(self, x):
"""Forward pass through the network"""
batch_size = x.size(0)
# Process different input shapes
if len(x.shape) > 2:
# Handle 3D input [batch, timeframes, features]
if self.conv_layers is not None:
# Reshape for 1D convolution:
# [batch, timeframes, features] -> [batch, timeframes, features*1]
if len(x.shape) == 3:
x = x.permute(0, 1, 2) # Ensure shape is [batch, timeframes, features]
x_reshaped = x.permute(0, 1, 2) # [batch, timeframes, features]
# Check if the feature dimension has changed and rebuild if necessary
if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
total_features = x_reshaped.size(1) * x_reshaped.size(2)
self._check_rebuild_network(total_features)
# Apply convolutions
x_conv = self.conv_layers(x_reshaped)
# Flatten: [batch, channels, 1] -> [batch, channels]
x_flat = x_conv.view(batch_size, -1)
else:
# If no conv layers, just flatten
x_flat = x.view(batch_size, -1)
else:
# For 2D input [batch, features]
x_flat = x
# Check if dimensions have changed
if x_flat.size(1) != self.feature_dim:
self._check_rebuild_network(x_flat.size(1))
# Apply FC layers
features = self.fc_layers(x_flat)
# Add attention for feature refinement
features_3d = features.unsqueeze(1) # [batch, 1, features]
features_attended, _ = self.attention(features_3d)
features_refined = features_attended.squeeze(1) # [batch, features]
# Calculate advantage and value
advantage = self.advantage_stream(features_refined)
value = self.value_stream(features_refined)
# Combine for Q-values (Dueling architecture)
q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
# Get extrema predictions
extrema_pred = self.extrema_head(features_refined)
# Price movement predictions
price_immediate = self.price_pred_immediate(features_refined)
price_midterm = self.price_pred_midterm(features_refined)
price_longterm = self.price_pred_longterm(features_refined)
price_values = self.price_pred_value(features_refined)
# Package price predictions
price_predictions = {
'immediate': price_immediate,
'midterm': price_midterm,
'longterm': price_longterm,
'values': price_values
}
return q_values, extrema_pred, price_predictions, features_refined
def act(self, state, explore=True):
"""
Choose action based on state with confidence thresholding
"""
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
q_values, _, _, _ = self(state_tensor)
# Apply softmax to get action probabilities
action_probs = F.softmax(q_values, dim=1)
# Get action with highest probability
action = action_probs.argmax(dim=1).item()
action_confidence = action_probs[0, action].item()
# Check if confidence exceeds threshold
if action_confidence < self.confidence_threshold:
# Force HOLD action (typically action 2)
action = 2 # Assume 2 is HOLD
logger.info(f"Action {action} confidence {action_confidence:.4f} below threshold {self.confidence_threshold}, forcing HOLD")
return action, action_confidence
def save(self, path):
"""Save model weights and architecture"""
os.makedirs(os.path.dirname(path), exist_ok=True)
torch.save({
'state_dict': self.state_dict(),
'input_shape': self.input_shape,
'n_actions': self.n_actions,
'feature_dim': self.feature_dim,
'confidence_threshold': self.confidence_threshold
}, f"{path}.pt")
logger.info(f"Enhanced CNN model saved to {path}.pt")
def load(self, path):
"""Load model weights and architecture"""
try:
checkpoint = torch.load(f"{path}.pt", map_location=self.device)
self.input_shape = checkpoint['input_shape']
self.n_actions = checkpoint['n_actions']
self.feature_dim = checkpoint['feature_dim']
if 'confidence_threshold' in checkpoint:
self.confidence_threshold = checkpoint['confidence_threshold']
self._build_network()
self.load_state_dict(checkpoint['state_dict'])
self.to(self.device)
logger.info(f"Enhanced CNN model loaded from {path}.pt")
return True
except Exception as e:
logger.error(f"Error loading model: {str(e)}")
return False
# Additional utility for example sifting
class ExampleSiftingDataset:
"""
Dataset that selectively keeps high-quality examples for training
to improve model performance
"""
def __init__(self, max_examples=50000):
self.examples = []
self.labels = []
self.rewards = []
self.max_examples = max_examples
self.min_reward_threshold = -0.05 # Minimum reward to keep an example
def add_example(self, state, action, reward, next_state, done):
"""Add a new training example with reward-based filtering"""
# Only keep examples with rewards above the threshold
if reward > self.min_reward_threshold:
self.examples.append((state, action, reward, next_state, done))
self.rewards.append(reward)
# Sort by reward and keep only the top examples
if len(self.examples) > self.max_examples:
# Sort by reward (highest first)
sorted_indices = np.argsort(self.rewards)[::-1]
# Keep top examples
self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]]
self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]]
# Update the minimum reward threshold to be the minimum in our kept examples
self.min_reward_threshold = min(self.rewards)
def get_batch(self, batch_size):
"""Get a batch of examples, prioritizing better examples"""
if not self.examples:
return None
# Calculate selection probabilities based on rewards
rewards = np.array(self.rewards)
# Shift rewards to be positive for probability calculation
min_reward = min(rewards)
shifted_rewards = rewards - min_reward + 0.1 # Add small constant
probs = shifted_rewards / shifted_rewards.sum()
# Sample batch indices with reward-based probabilities
indices = np.random.choice(
len(self.examples),
size=min(batch_size, len(self.examples)),
p=probs,
replace=False
)
# Create batch
batch = [self.examples[i] for i in indices]
states, actions, rewards, next_states, dones = zip(*batch)
return {
'states': np.array(states),
'actions': np.array(actions),
'rewards': np.array(rewards),
'next_states': np.array(next_states),
'dones': np.array(dones)
}
def __len__(self):
return len(self.examples)

View File

@ -0,0 +1 @@
{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}

View File

@ -0,0 +1,20 @@
{
"supervised": {
"epochs_completed": 22650,
"best_val_pnl": 0.0,
"best_epoch": 50,
"best_win_rate": 0
},
"reinforcement": {
"episodes_completed": 0,
"best_reward": -Infinity,
"best_episode": 0,
"best_win_rate": 0
},
"hybrid": {
"iterations_completed": 453,
"best_combined_score": 0.0,
"training_started": "2025-04-09T10:30:42.510856",
"last_update": "2025-04-09T10:40:02.217840"
}
}

View File

@ -0,0 +1,326 @@
{
"epochs_completed": 8,
"best_val_pnl": 0.0,
"best_epoch": 1,
"best_win_rate": 0.0,
"training_started": "2025-04-02T10:43:58.946682",
"last_update": "2025-04-02T10:44:10.940892",
"epochs": [
{
"epoch": 1,
"train_loss": 1.0950355529785156,
"val_loss": 1.1657923062642415,
"train_acc": 0.3255208333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:01.840889",
"data_age": 2,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 2,
"train_loss": 1.0831659038861592,
"val_loss": 1.1212460199991863,
"train_acc": 0.390625,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:03.134833",
"data_age": 4,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 3,
"train_loss": 1.0740693012873332,
"val_loss": 1.0992945830027263,
"train_acc": 0.4739583333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:04.425272",
"data_age": 5,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 4,
"train_loss": 1.0747728943824768,
"val_loss": 1.0821794271469116,
"train_acc": 0.4609375,
"val_acc": 0.3229166666666667,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:05.716421",
"data_age": 6,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 5,
"train_loss": 1.0489931503931682,
"val_loss": 1.0669521888097127,
"train_acc": 0.5833333333333334,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:07.007935",
"data_age": 8,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 6,
"train_loss": 1.0533669590950012,
"val_loss": 1.0505590836207073,
"train_acc": 0.5104166666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:08.296061",
"data_age": 9,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 7,
"train_loss": 1.0456886688868205,
"val_loss": 1.0351698795954387,
"train_acc": 0.5651041666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:09.607584",
"data_age": 10,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 8,
"train_loss": 1.040040671825409,
"val_loss": 1.0227736632029216,
"train_acc": 0.6119791666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:10.940892",
"data_age": 11,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
}
],
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"total_wins": {
"train": 0,
"val": 0
}
}

View File

@ -0,0 +1,192 @@
{
"epochs_completed": 7,
"best_val_pnl": 0.002028853100759435,
"best_epoch": 6,
"best_win_rate": 0.5157894736842106,
"training_started": "2025-03-31T02:50:10.418670",
"last_update": "2025-03-31T02:50:15.227593",
"epochs": [
{
"epoch": 1,
"train_loss": 1.1206786036491394,
"val_loss": 1.0542699098587036,
"train_acc": 0.11197916666666667,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:12.881423",
"data_age": 2
},
{
"epoch": 2,
"train_loss": 1.1266120672225952,
"val_loss": 1.072133183479309,
"train_acc": 0.1171875,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.186840",
"data_age": 2
},
{
"epoch": 3,
"train_loss": 1.1415620843569438,
"val_loss": 1.1701548099517822,
"train_acc": 0.1015625,
"val_acc": 0.5208333333333334,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.442018",
"data_age": 3
},
{
"epoch": 4,
"train_loss": 1.1331567962964375,
"val_loss": 1.070081114768982,
"train_acc": 0.09375,
"val_acc": 0.22916666666666666,
"train_pnl": 0.010650217327384765,
"val_pnl": -0.0007049481907895126,
"train_win_rate": 0.49279538904899134,
"val_win_rate": 0.40625,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.9036458333333334,
"HOLD": 0.09635416666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.3333333333333333,
"HOLD": 0.6666666666666666
}
},
"timestamp": "2025-03-31T02:50:13.739899",
"data_age": 3
},
{
"epoch": 5,
"train_loss": 1.10965762535731,
"val_loss": 1.0485950708389282,
"train_acc": 0.12239583333333333,
"val_acc": 0.17708333333333334,
"train_pnl": 0.011924086862580204,
"val_pnl": 0.0,
"train_win_rate": 0.5070422535211268,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7395833333333334,
"HOLD": 0.2604166666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:14.073439",
"data_age": 3
},
{
"epoch": 6,
"train_loss": 1.1272419293721516,
"val_loss": 1.084235429763794,
"train_acc": 0.1015625,
"val_acc": 0.22916666666666666,
"train_pnl": 0.014825159601390072,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4908616187989556,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:14.658295",
"data_age": 4
},
{
"epoch": 7,
"train_loss": 1.1171108484268188,
"val_loss": 1.0741244554519653,
"train_acc": 0.1171875,
"val_acc": 0.22916666666666666,
"train_pnl": 0.0059474696523706605,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4838709677419355,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7291666666666666,
"HOLD": 0.2708333333333333
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:15.227593",
"data_age": 4
}
]
}

View File

@ -112,27 +112,33 @@ class SimpleCNN(nn.Module):
def _build_network(self):
"""Build the neural network with current feature dimensions"""
# Create a flexible architecture that adapts to input dimensions
# Increased complexity
self.fc_layers = nn.Sequential(
nn.Linear(self.feature_dim, 256),
nn.Linear(self.feature_dim, 512), # Increased size
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU()
nn.Dropout(0.2), # Added dropout
nn.Linear(512, 512), # Increased size
nn.ReLU(),
nn.Dropout(0.2), # Added dropout
nn.Linear(512, 512), # Added layer
nn.ReLU(),
nn.Dropout(0.2) # Added dropout
)
# Output heads (Dueling DQN architecture)
self.advantage_head = nn.Linear(256, self.n_actions)
self.value_head = nn.Linear(256, 1)
self.advantage_head = nn.Linear(512, self.n_actions) # Updated input size
self.value_head = nn.Linear(512, 1) # Updated input size
# Extrema detection head
self.extrema_head = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
self.extrema_head = nn.Linear(512, 3) # 0=bottom, 1=top, 2=neither, Updated input size
# Price prediction heads for different timeframes
self.price_pred_immediate = nn.Linear(256, 3) # Up, Down, Sideways for immediate term (1s, 1m)
self.price_pred_midterm = nn.Linear(256, 3) # Up, Down, Sideways for mid-term (1h)
self.price_pred_longterm = nn.Linear(256, 3) # Up, Down, Sideways for long-term (1d)
self.price_pred_immediate = nn.Linear(512, 3) # Updated input size
self.price_pred_midterm = nn.Linear(512, 3) # Updated input size
self.price_pred_longterm = nn.Linear(512, 3) # Updated input size
# Regression heads for exact price prediction
self.price_pred_value = nn.Linear(256, 4) # Predicts % change for each timeframe (1s, 1m, 1h, 1d)
self.price_pred_value = nn.Linear(512, 4) # Updated input size
def _check_rebuild_network(self, features):
"""Check if network needs to be rebuilt for different feature dimensions"""
@ -146,58 +152,70 @@ class SimpleCNN(nn.Module):
return False
def forward(self, x):
"""
Forward pass through the network
Returns action values, extrema predictions, and price movement predictions for multiple timeframes
"""
# Handle different input shapes
if len(x.shape) == 2: # [batch_size, features]
# Simple feature vector
batch_size, features = x.shape
# Check if we need to rebuild the network for new dimensions
self._check_rebuild_network(features)
elif len(x.shape) == 3: # [batch_size, timeframes/channels, features]
# Reshape to flatten timeframes/channels with features
batch_size, timeframes, features = x.shape
total_features = timeframes * features
# Check if we need to rebuild the network for new dimensions
self._check_rebuild_network(total_features)
# Reshape tensor to [batch_size, total_features]
x = x.reshape(batch_size, total_features)
# Apply fully connected layers
fc_out = self.fc_layers(x)
"""Forward pass through the network"""
# Flatten input if needed to ensure it matches the expected feature dimension
batch_size = x.size(0)
# Dueling architecture
advantage = self.advantage_head(fc_out)
value = self.value_head(fc_out)
# Reshape input if needed
if len(x.shape) > 2: # Handle multi-dimensional input
# For 3D input: [batch, seq_len, features] or [batch, channels, features]
x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features]
# Q-values = value + (advantage - mean(advantage))
action_values = value + advantage - advantage.mean(dim=1, keepdim=True)
# Check if the feature dimension matches and rebuild if necessary
if x.size(1) != self.feature_dim:
self._check_rebuild_network(x.size(1))
# Extrema predictions
extrema_pred = self.extrema_head(fc_out)
# Apply fully connected layers with ReLU activation
x = self.fc_layers(x)
# Price movement predictions for different timeframes
price_immediate = self.price_pred_immediate(fc_out) # 1s, 1m
price_midterm = self.price_pred_midterm(fc_out) # 1h
price_longterm = self.price_pred_longterm(fc_out) # 1d
# Branch 1: Action values (Q-values)
action_values = self.advantage_head(x)
# Regression values for exact price predictions (percentage changes)
price_values = self.price_pred_value(fc_out)
# Branch 2: Extrema detection (market top/bottom classification)
extrema_pred = self.extrema_head(x)
# Return all predictions in a structured dictionary
# Branch 3: Price movement prediction over different timeframes
# Split into three timeframes: immediate, midterm, longterm
price_immediate = self.price_pred_immediate(x)
price_midterm = self.price_pred_midterm(x)
price_longterm = self.price_pred_longterm(x)
# Branch 4: Value prediction (regression for expected price changes)
price_values = self.price_pred_value(x)
# Package price predictions
price_predictions = {
'immediate': price_immediate,
'midterm': price_midterm,
'longterm': price_longterm,
'values': price_values
'immediate': price_immediate, # Classification (up/down/sideways)
'midterm': price_midterm, # Classification (up/down/sideways)
'longterm': price_longterm, # Classification (up/down/sideways)
'values': price_values # Regression (expected % change)
}
return action_values, extrema_pred, price_predictions
# Return all outputs and the hidden feature representation
return action_values, extrema_pred, price_predictions, x
def extract_features(self, x):
"""Extract hidden features from the input and return both action values and features"""
# Flatten input if needed to ensure it matches the expected feature dimension
batch_size = x.size(0)
# Reshape input if needed
if len(x.shape) > 2: # Handle multi-dimensional input
# For 3D input: [batch, seq_len, features] or [batch, channels, features]
x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features]
# Check if the feature dimension matches and rebuild if necessary
if x.size(1) != self.feature_dim:
self._check_rebuild_network(x.size(1))
# Apply fully connected layers with ReLU activation
x_features = self.fc_layers(x)
# Branch 1: Action values (Q-values)
action_values = self.advantage_head(x_features)
# Return action values and the hidden feature representation
return action_values, x_features
def save(self, path):
"""Save model weights and architecture"""
@ -241,8 +259,10 @@ class CNNModelPyTorch(nn.Module):
self.output_size = output_size
self.timeframes = timeframes
# Calculate total input features across all timeframes
self.total_features = num_features * len(timeframes)
# num_features should already be the total features across all timeframes
self.total_features = num_features
logger.info(f"CNNModelPyTorch initialized with window_size={window_size}, num_features={num_features}, "
f"total_features={self.total_features}, output_size={output_size}, timeframes={timeframes}")
# Device configuration
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@ -317,6 +337,10 @@ class CNNModelPyTorch(nn.Module):
# Ensure input is on the correct device
x = x.to(self.device)
# Log input tensor shape for debugging
input_shape = x.size()
logger.debug(f"Input tensor shape: {input_shape}")
# Check input dimensions and reshape as needed
if len(x.size()) == 2:
# If input is [batch_size, features], reshape to [batch_size, features, 1]
@ -324,8 +348,17 @@ class CNNModelPyTorch(nn.Module):
# Check and handle if input features don't match model expectations
if feature_dim != self.total_features:
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
self.rebuild_conv_layers(feature_dim)
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
if not hasattr(self, 'rebuild_warning_shown'):
logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
self.rebuild_warning_shown = True
# Don't rebuild - instead adapt the input
# If features are fewer, pad with zeros. If more, truncate
if feature_dim < self.total_features:
padding = torch.zeros(batch_size, self.total_features - feature_dim, device=self.device)
x = torch.cat([x, padding], dim=1)
else:
x = x[:, :self.total_features]
# For 1D input, use a sequence length of 1
seq_len = 1
@ -336,14 +369,26 @@ class CNNModelPyTorch(nn.Module):
# Check and handle if input dimensions don't match model expectations
if feature_dim != self.total_features:
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
self.rebuild_conv_layers(feature_dim)
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
if not hasattr(self, 'rebuild_warning_shown'):
logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
self.rebuild_warning_shown = True
# Don't rebuild - instead adapt the input
# If features are fewer, pad with zeros. If more, truncate
if feature_dim < self.total_features:
padding = torch.zeros(batch_size, seq_len, self.total_features - feature_dim, device=self.device)
x = torch.cat([x, padding], dim=2)
else:
x = x[:, :, :self.total_features]
# Reshape input: [batch, window_size, features] -> [batch, features, window_size]
x = x.permute(0, 2, 1)
else:
raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
# Log reshaped tensor for debugging
logger.debug(f"Reshaped tensor for convolution: {x.size()}")
# Convolutional layers with dropout - safely handle small spatial dimensions
try:
x = self.dropout1(F.relu(self.norm1(self.conv1(x))))