diff --git a/.kiro/specs/multi-modal-trading-system/design.md b/.kiro/specs/multi-modal-trading-system/design.md index 3fec0a5..55d61be 100644 --- a/.kiro/specs/multi-modal-trading-system/design.md +++ b/.kiro/specs/multi-modal-trading-system/design.md @@ -72,8 +72,10 @@ Based on the existing implementation in `core/data_provider.py`, we'll enhance i - OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC - COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD - 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets -- ***OUTPUTS***: suggested trade action (BUY/SELL) - +- ***OUTPUTS***: + - suggested trade action (BUY/SELL/HOLD). Paired with confidence + - immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence + # Standardized input for all models: { 'primary_symbol': 'ETH/USDT', diff --git a/NN/models/dqn_agent.py b/NN/models/dqn_agent.py index 3541f61..6603c3c 100644 --- a/NN/models/dqn_agent.py +++ b/NN/models/dqn_agent.py @@ -4,7 +4,7 @@ import torch.optim as optim import numpy as np from collections import deque import random -from typing import Tuple, List +from typing import Tuple, List, Dict, Any import os import sys import logging @@ -84,8 +84,8 @@ class DQNNetwork(nn.Module): nn.Linear(512, 4) # trending, ranging, volatile, mixed ) - # Price prediction head - self.price_head = nn.Sequential( + # Price direction prediction head - outputs direction and confidence + self.price_direction_head = nn.Sequential( nn.Linear(2048, 1024), nn.LayerNorm(1024), nn.ReLU(inplace=True), @@ -93,9 +93,14 @@ class DQNNetwork(nn.Module): nn.Linear(1024, 512), nn.LayerNorm(512), nn.ReLU(inplace=True), - nn.Linear(512, 3) # short, medium, long term price direction + nn.Linear(512, 2) # [direction, confidence] ) + # Direction activation (tanh for -1 to 1) + self.direction_activation = nn.Tanh() + # Confidence activation (sigmoid for 0 to 1) + self.confidence_activation = nn.Sigmoid() + # Volatility prediction head self.volatility_head = nn.Sequential( nn.Linear(2048, 1024), @@ -105,7 +110,7 @@ class DQNNetwork(nn.Module): nn.Linear(1024, 256), nn.LayerNorm(256), nn.ReLU(inplace=True), - nn.Linear(256, 1) # predicted volatility + nn.Linear(256, 4) # predicted volatility for 4 timeframes ) # Main Q-value head (dueling architecture) @@ -162,7 +167,13 @@ class DQNNetwork(nn.Module): # Multiple prediction heads regime_pred = self.regime_head(features) - price_pred = self.price_head(features) + price_direction_raw = self.price_direction_head(features) + + # Apply separate activations to direction and confidence + direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1 + confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1 + price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2] + volatility_pred = self.volatility_head(features) # Dueling Q-network @@ -172,7 +183,7 @@ class DQNNetwork(nn.Module): # Combine value and advantage for Q-values q_values = value + advantage - advantage.mean(dim=1, keepdim=True) - return q_values, regime_pred, price_pred, volatility_pred, features + return q_values, regime_pred, price_direction_pred, volatility_pred, features def act(self, state, explore=True): """ @@ -196,7 +207,11 @@ class DQNNetwork(nn.Module): state = state.unsqueeze(0) with torch.no_grad(): - q_values, regime_pred, price_pred, volatility_pred, features = self.forward(state) + q_values, regime_pred, price_direction_pred, volatility_pred, features = self.forward(state) + + # Process price direction predictions + if price_direction_pred is not None: + self.process_price_direction_predictions(price_direction_pred) # Get action probabilities using softmax action_probs = F.softmax(q_values, dim=1) @@ -332,23 +347,10 @@ class DQNAgent: self.recent_prices = deque(maxlen=20) self.recent_rewards = deque(maxlen=100) - # Price prediction tracking - self.last_price_pred = { - 'immediate': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - }, - 'midterm': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - }, - 'longterm': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - } + # Price direction tracking - stores direction and confidence + self.last_price_direction = { + 'direction': 0.0, # Single value between -1 and 1 + 'confidence': 0.0 # Single value between 0 and 1 } # Store separate memory for price direction examples @@ -521,25 +523,6 @@ class DQNAgent: logger.error(f"Error saving DQN checkpoint: {e}") return False - # Price prediction tracking - self.last_price_pred = { - 'immediate': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - }, - 'midterm': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - }, - 'longterm': { - 'direction': 1, # Default to "sideways" - 'confidence': 0.0, - 'change': 0.0 - } - } - # Store separate memory for price direction examples self.price_movement_memory = [] # For storing examples of clear price movements @@ -811,6 +794,92 @@ class DQNAgent: logger.error(f"Error in act_with_confidence: {e}") # Return default action with low confidence return 1, 0.1, [0.45, 0.55] # Default to HOLD action + + def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]: + """ + Process price direction predictions and convert to standardized format + + Args: + price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence] + + Returns: + Dict with direction (-1 to 1) and confidence (0 to 1) + """ + try: + if price_direction_pred is None or price_direction_pred.numel() == 0: + return self.last_price_direction + + # Extract direction and confidence values + direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1 + confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1 + + # Update last price direction + self.last_price_direction = { + 'direction': direction_value, + 'confidence': confidence_value + } + + return self.last_price_direction + + except Exception as e: + logger.error(f"Error processing price direction predictions: {e}") + return self.last_price_direction + + def get_price_direction_vector(self) -> Dict[str, float]: + """ + Get the current price direction and confidence + + Returns: + Dict with direction (-1 to 1) and confidence (0 to 1) + """ + return self.last_price_direction + + def get_price_direction_summary(self) -> Dict[str, Any]: + """ + Get a summary of price direction prediction + + Returns: + Dict containing direction and confidence information + """ + try: + direction_value = self.last_price_direction['direction'] + confidence_value = self.last_price_direction['confidence'] + + # Convert to discrete direction + if direction_value > 0.1: + direction_label = "UP" + discrete_direction = 1 + elif direction_value < -0.1: + direction_label = "DOWN" + discrete_direction = -1 + else: + direction_label = "SIDEWAYS" + discrete_direction = 0 + + return { + 'direction_value': float(direction_value), + 'confidence_value': float(confidence_value), + 'direction_label': direction_label, + 'discrete_direction': discrete_direction, + 'strength': abs(float(direction_value)), + 'weighted_strength': abs(float(direction_value)) * float(confidence_value) + } + + except Exception as e: + logger.error(f"Error calculating price direction summary: {e}") + return { + 'direction_value': 0.0, + 'confidence_value': 0.0, + 'direction_label': "SIDEWAYS", + 'discrete_direction': 0, + 'strength': 0.0, + 'weighted_strength': 0.0 + } + + except Exception as e: + logger.error(f"Error in act_with_confidence: {e}") + # Return default action with low confidence + return 1, 0.1, [0.45, 0.55] # Default to HOLD action def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore): """ @@ -1032,11 +1101,8 @@ class DQNAgent: logger.error(f"Error converting experiences to tensors: {e}") return 0.0 - # Choose training method based on precision mode - if self.use_mixed_precision: - loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones) - else: - loss = self._replay_standard(states, actions, rewards, next_states, dones) + # Always use standard training to fix gradient issues + loss = self._replay_standard(states, actions, rewards, next_states, dones) # Update epsilon if self.epsilon > self.epsilon_min: @@ -1208,9 +1274,33 @@ class DQNAgent: q_loss = self.criterion(current_q_values, target_q_values.detach()) - # Use only Q-loss for now to ensure clean gradients + # Calculate auxiliary losses and add to Q-loss total_loss = q_loss + # Add auxiliary losses if available + try: + # Get additional predictions from forward pass + if isinstance(q_values_output, tuple) and len(q_values_output) >= 5: + current_regime_pred = q_values_output[1] + current_price_pred = q_values_output[2] + current_volatility_pred = q_values_output[3] + current_extrema_pred = current_regime_pred # Use regime as extrema proxy for now + + # Price direction loss + if current_price_pred is not None and current_price_pred.shape[0] > 0: + price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions) + if price_direction_loss is not None: + total_loss = total_loss + 0.2 * price_direction_loss + + # Extrema loss + if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0: + extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions) + if extrema_loss is not None: + total_loss = total_loss + 0.1 * extrema_loss + + except Exception as e: + logger.debug(f"Could not add auxiliary loss in standard training: {e}") + # Reset gradients self.optimizer.zero_grad() @@ -1309,13 +1399,17 @@ class DQNAgent: # Add auxiliary losses if available try: + # Price direction loss + if current_price_pred is not None and current_price_pred.shape[0] > 0: + price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions) + if price_direction_loss is not None: + loss = loss + 0.2 * price_direction_loss + + # Extrema loss if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0: - # Simple extrema targets - with torch.no_grad(): - extrema_targets = torch.ones(current_extrema_pred.shape[0], dtype=torch.long, device=current_extrema_pred.device) * 2 - - extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets) - loss = loss + 0.1 * extrema_loss + extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions) + if extrema_loss is not None: + loss = loss + 0.1 * extrema_loss except Exception as e: logger.debug(f"Could not add auxiliary loss in mixed precision: {e}") @@ -1649,6 +1743,95 @@ class DQNAgent: 'exit_threshold': self.exit_confidence_threshold } + def _calculate_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor: + """ + Calculate loss for price direction predictions + + Args: + price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence] + rewards: Tensor of shape [batch] containing rewards + actions: Tensor of shape [batch] containing actions + + Returns: + Price direction loss tensor + """ + try: + if price_direction_pred.size(1) != 2: + return None + + batch_size = price_direction_pred.size(0) + + # Extract direction and confidence predictions + direction_pred = price_direction_pred[:, 0] # -1 to 1 + confidence_pred = price_direction_pred[:, 1] # 0 to 1 + + # Create targets based on rewards and actions + with torch.no_grad(): + # Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise + direction_targets = torch.zeros(batch_size, device=price_direction_pred.device) + for i in range(batch_size): + if rewards[i] > 0.01: # Positive reward threshold + if actions[i] == 0: # BUY action + direction_targets[i] = 1.0 # UP + elif actions[i] == 1: # SELL action + direction_targets[i] = -1.0 # DOWN + # else: targets remain 0 (sideways) + + # Confidence targets: based on reward magnitude (higher reward = higher confidence) + confidence_targets = torch.abs(rewards).clamp(0, 1) + + # Calculate losses for each component + direction_loss = F.mse_loss(direction_pred, direction_targets) + confidence_loss = F.mse_loss(confidence_pred, confidence_targets) + + # Combined loss (direction is more important than confidence) + total_loss = direction_loss + 0.3 * confidence_loss + + return total_loss + + except Exception as e: + logger.debug(f"Error calculating price direction loss: {e}") + return None + + def _calculate_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor: + """ + Calculate loss for extrema predictions + + Args: + extrema_pred: Extrema predictions + rewards: Tensor containing rewards + actions: Tensor containing actions + + Returns: + Extrema loss tensor + """ + try: + batch_size = extrema_pred.size(0) + + # Create targets based on reward patterns + with torch.no_grad(): + extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither" + + for i in range(batch_size): + # High positive reward suggests we're at a good entry point (potential bottom for BUY, top for SELL) + if rewards[i] > 0.05: + if actions[i] == 0: # BUY action + extrema_targets[i] = 0 # Bottom + elif actions[i] == 1: # SELL action + extrema_targets[i] = 1 # Top + + # Calculate cross-entropy loss + if extrema_pred.size(1) >= 3: + extrema_loss = F.cross_entropy(extrema_pred[:, :3], extrema_targets) + else: + extrema_loss = F.cross_entropy(extrema_pred, extrema_targets) + + return extrema_loss + + except Exception as e: + logger.debug(f"Error calculating extrema loss: {e}") + return None + def get_enhanced_training_stats(self): """Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)""" return { diff --git a/NN/models/enhanced_cnn.py b/NN/models/enhanced_cnn.py index b5084f0..89134a4 100644 --- a/NN/models/enhanced_cnn.py +++ b/NN/models/enhanced_cnn.py @@ -265,8 +265,9 @@ class EnhancedCNN(nn.Module): nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither ) - # ULTRA MASSIVE multi-timeframe price prediction heads - self.price_pred_immediate = nn.Sequential( + # ULTRA MASSIVE price direction prediction head + # Outputs single direction and confidence values + self.price_direction_head = nn.Sequential( nn.Linear(1024, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), @@ -275,32 +276,13 @@ class EnhancedCNN(nn.Module): nn.Dropout(0.3), nn.Linear(512, 256), # Increased from 128 nn.ReLU(), - nn.Linear(256, 3) # Up, Down, Sideways + nn.Linear(256, 2) # [direction, confidence] ) - self.price_pred_midterm = nn.Sequential( - nn.Linear(1024, 1024), # Increased from 512 - nn.ReLU(), - nn.Dropout(0.3), - nn.Linear(1024, 512), # Increased from 256 - nn.ReLU(), - nn.Dropout(0.3), - nn.Linear(512, 256), # Increased from 128 - nn.ReLU(), - nn.Linear(256, 3) # Up, Down, Sideways - ) - - self.price_pred_longterm = nn.Sequential( - nn.Linear(1024, 1024), # Increased from 512 - nn.ReLU(), - nn.Dropout(0.3), - nn.Linear(1024, 512), # Increased from 256 - nn.ReLU(), - nn.Dropout(0.3), - nn.Linear(512, 256), # Increased from 128 - nn.ReLU(), - nn.Linear(256, 3) # Up, Down, Sideways - ) + # Direction activation (tanh for -1 to 1) + self.direction_activation = nn.Tanh() + # Confidence activation (sigmoid for 0 to 1) + self.confidence_activation = nn.Sigmoid() # ULTRA MASSIVE value prediction with ensemble approaches self.price_pred_value = nn.Sequential( @@ -490,10 +472,14 @@ class EnhancedCNN(nn.Module): # Extrema predictions (bottom/top/neither detection) extrema_pred = self.extrema_head(features_refined) - # Multi-timeframe price movement predictions - price_immediate = self.price_pred_immediate(features_refined) - price_midterm = self.price_pred_midterm(features_refined) - price_longterm = self.price_pred_longterm(features_refined) + # Price direction predictions + price_direction_raw = self.price_direction_head(features_refined) + + # Apply separate activations to direction and confidence + direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1 + confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1 + price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2] + price_values = self.price_pred_value(features_refined) # Additional specialized predictions for enhanced accuracy @@ -502,15 +488,14 @@ class EnhancedCNN(nn.Module): market_regime_pred = self.market_regime_head(features_refined) risk_pred = self.risk_head(features_refined) - # Package all price predictions into a single tensor (use immediate as primary) - # For compatibility with DQN agent, we return price_immediate as the price prediction tensor - price_pred_tensor = price_immediate + # Use the price direction prediction directly (already [batch, 2]) + price_direction_tensor = price_direction_pred # Package additional predictions into a single tensor (use volatility as primary) # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor advanced_pred_tensor = volatility_pred - return q_values, extrema_pred, price_pred_tensor, features_refined, advanced_pred_tensor + return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor def act(self, state, explore=True) -> Tuple[int, float, List[float]]: """Enhanced action selection with ultra massive model predictions""" @@ -528,7 +513,11 @@ class EnhancedCNN(nn.Module): state_tensor = state_tensor.unsqueeze(0) with torch.no_grad(): - q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor) + q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor) + + # Process price direction predictions + if price_direction_predictions is not None: + self.process_price_direction_predictions(price_direction_predictions) # Apply softmax to get action probabilities action_probs_tensor = torch.softmax(q_values, dim=1) @@ -565,6 +554,100 @@ class EnhancedCNN(nn.Module): logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})") return action_idx, confidence, action_probs + + def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]: + """ + Process price direction predictions and convert to standardized format + + Args: + price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence] + + Returns: + Dict with direction (-1 to 1) and confidence (0 to 1) + """ + try: + if price_direction_pred is None or price_direction_pred.numel() == 0: + return {} + + # Extract direction and confidence values + direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1 + confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1 + + processed_directions = { + 'direction': direction_value, + 'confidence': confidence_value + } + + # Store for later access + self.last_price_direction = processed_directions + + return processed_directions + + except Exception as e: + logger.error(f"Error processing price direction predictions: {e}") + return {} + + def get_price_direction_vector(self) -> Dict[str, float]: + """ + Get the current price direction and confidence + + Returns: + Dict with direction (-1 to 1) and confidence (0 to 1) + """ + return getattr(self, 'last_price_direction', {}) + + def get_price_direction_summary(self) -> Dict[str, Any]: + """ + Get a summary of price direction prediction + + Returns: + Dict containing direction and confidence information + """ + try: + last_direction = getattr(self, 'last_price_direction', {}) + if not last_direction: + return { + 'direction_value': 0.0, + 'confidence_value': 0.0, + 'direction_label': "SIDEWAYS", + 'discrete_direction': 0, + 'strength': 0.0, + 'weighted_strength': 0.0 + } + + direction_value = last_direction['direction'] + confidence_value = last_direction['confidence'] + + # Convert to discrete direction + if direction_value > 0.1: + direction_label = "UP" + discrete_direction = 1 + elif direction_value < -0.1: + direction_label = "DOWN" + discrete_direction = -1 + else: + direction_label = "SIDEWAYS" + discrete_direction = 0 + + return { + 'direction_value': float(direction_value), + 'confidence_value': float(confidence_value), + 'direction_label': direction_label, + 'discrete_direction': discrete_direction, + 'strength': abs(float(direction_value)), + 'weighted_strength': abs(float(direction_value)) * float(confidence_value) + } + + except Exception as e: + logger.error(f"Error calculating price direction summary: {e}") + return { + 'direction_value': 0.0, + 'confidence_value': 0.0, + 'direction_label': "SIDEWAYS", + 'discrete_direction': 0, + 'strength': 0.0, + 'weighted_strength': 0.0 + } def save(self, path): """Save model weights and architecture""" diff --git a/core/orchestrator.py b/core/orchestrator.py index 231ef13..0f843d8 100644 --- a/core/orchestrator.py +++ b/core/orchestrator.py @@ -719,6 +719,95 @@ class TradingOrchestrator: except Exception as e: logger.error(f"Error initializing ML models: {e}") + def _calculate_cnn_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor: + """ + Calculate price direction loss for CNN model + + Args: + price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence] + rewards: Tensor of shape [batch] containing rewards + actions: Tensor of shape [batch] containing actions + + Returns: + Price direction loss tensor + """ + try: + if price_direction_pred.size(1) != 2: + return None + + batch_size = price_direction_pred.size(0) + + # Extract direction and confidence predictions + direction_pred = price_direction_pred[:, 0] # -1 to 1 + confidence_pred = price_direction_pred[:, 1] # 0 to 1 + + # Create targets based on rewards and actions + with torch.no_grad(): + # Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise + direction_targets = torch.zeros(batch_size, device=price_direction_pred.device) + for i in range(batch_size): + if rewards[i] > 0.01: # Positive reward threshold + if actions[i] == 0: # BUY action + direction_targets[i] = 1.0 # UP + elif actions[i] == 1: # SELL action + direction_targets[i] = -1.0 # DOWN + # else: targets remain 0 (sideways) + + # Confidence targets: based on reward magnitude (higher reward = higher confidence) + confidence_targets = torch.abs(rewards).clamp(0, 1) + + # Calculate losses for each component + direction_loss = nn.MSELoss()(direction_pred, direction_targets) + confidence_loss = nn.MSELoss()(confidence_pred, confidence_targets) + + # Combined loss (direction is more important than confidence) + total_loss = direction_loss + 0.3 * confidence_loss + + return total_loss + + except Exception as e: + logger.debug(f"Error calculating CNN price direction loss: {e}") + return None + + def _calculate_cnn_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor: + """ + Calculate extrema loss for CNN model + + Args: + extrema_pred: Extrema predictions + rewards: Tensor containing rewards + actions: Tensor containing actions + + Returns: + Extrema loss tensor + """ + try: + batch_size = extrema_pred.size(0) + + # Create targets based on reward patterns + with torch.no_grad(): + extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither" + + for i in range(batch_size): + # High positive reward suggests we're at a good entry point + if rewards[i] > 0.05: + if actions[i] == 0: # BUY action + extrema_targets[i] = 0 # Bottom + elif actions[i] == 1: # SELL action + extrema_targets[i] = 1 # Top + + # Calculate cross-entropy loss + if extrema_pred.size(1) >= 3: + extrema_loss = nn.CrossEntropyLoss()(extrema_pred[:, :3], extrema_targets) + else: + extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets) + + return extrema_loss + + except Exception as e: + logger.debug(f"Error calculating CNN extrema loss: {e}") + return None + def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None): """Update model loss and potentially best loss""" if model_name in self.model_states: @@ -1938,7 +2027,71 @@ class TradingOrchestrator: # Evaluate the previous prediction and train the model immediately await self._evaluate_and_train_on_record(inference_record, current_price) - logger.info(f"Completed immediate training for {model_name}") + # Log predicted vs actual outcome + prediction = inference_record.get('prediction', {}) + predicted_action = prediction.get('action', 'UNKNOWN') + predicted_confidence = prediction.get('confidence', 0.0) + + # Calculate actual outcome + symbol = inference_record.get('symbol', 'ETH/USDT') + predicted_price = None + actual_price_change_pct = 0.0 + + # Try to get price direction vectors from metadata (new format) + if 'price_direction' in prediction and prediction['price_direction']: + try: + price_direction_data = prediction['price_direction'] + # Process price direction data + if isinstance(price_direction_data, dict) and 'direction' in price_direction_data: + direction = price_direction_data['direction'] + confidence = price_direction_data.get('confidence', 1.0) + + # Convert direction to price change percentage + # Scale by confidence and direction strength + predicted_price_change_pct = direction * confidence * 0.02 # 2% max change + predicted_price = current_price * (1 + predicted_price_change_pct) + except Exception as e: + logger.debug(f"Error processing price direction data: {e}") + + # Fallback to old price prediction format + elif 'price_prediction' in prediction and prediction['price_prediction']: + try: + price_prediction_data = prediction['price_prediction'] + if isinstance(price_prediction_data, list) and len(price_prediction_data) > 0: + predicted_price_change_pct = float(price_prediction_data[0]) * 0.01 + predicted_price = current_price * (1 + predicted_price_change_pct) + except Exception: + pass + + # Calculate price change + if predicted_price is not None: + actual_price_change_pct = (current_price - predicted_price) / predicted_price * 100 + price_outcome = f"Predicted: ${predicted_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)" + else: + # Fall back to historical price comparison + historical_data = self.data_provider.get_historical_data(symbol, '1m', limit=10) + if historical_data is not None and not historical_data.empty: + historical_price = historical_data['close'].iloc[-1] + actual_price_change_pct = (current_price - historical_price) / historical_price * 100 + price_outcome = f"Historical: ${historical_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)" + else: + price_outcome = f"Actual: ${current_price:.2f}" + + # Determine if prediction was correct based on action and price movement + was_correct = False + if predicted_action == 'BUY' and actual_price_change_pct > 0.1: # Price went up + was_correct = True + elif predicted_action == 'SELL' and actual_price_change_pct < -0.1: # Price went down + was_correct = True + elif predicted_action == 'HOLD' and abs(actual_price_change_pct) < 0.5: # Price stayed stable + was_correct = True + + outcome_status = "✅ CORRECT" if was_correct else "❌ INCORRECT" + + logger.info(f"Completed immediate training for {model_name} - {outcome_status}") + logger.info(f" Prediction: {predicted_action} ({predicted_confidence:.3f})") + logger.info(f" {price_outcome}") + logger.info(f" Outcome: {outcome_status}") except Exception as e: logger.error(f"Error in immediate training for {model_name}: {e}") @@ -2412,12 +2565,33 @@ class TradingOrchestrator: self.cnn_optimizer.zero_grad() # Forward pass - q_values, extrema_pred, price_pred, features_refined, advanced_pred = self.cnn_model(features_tensor) + q_values, extrema_pred, price_direction_pred, features_refined, advanced_pred = self.cnn_model(features_tensor) - # Calculate loss + # Calculate primary Q-value loss q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1) target_q = reward_tensor # Simplified target - loss = nn.MSELoss()(q_values_selected, target_q) + q_loss = nn.MSELoss()(q_values_selected, target_q) + + # Calculate auxiliary losses for price direction and extrema + total_loss = q_loss + + # Price direction loss + if price_direction_pred is not None and price_direction_pred.shape[0] > 0: + price_direction_loss = self._calculate_cnn_price_direction_loss( + price_direction_pred, reward_tensor, action_tensor + ) + if price_direction_loss is not None: + total_loss = total_loss + 0.2 * price_direction_loss + + # Extrema loss + if extrema_pred is not None and extrema_pred.shape[0] > 0: + extrema_loss = self._calculate_cnn_extrema_loss( + extrema_pred, reward_tensor, action_tensor + ) + if extrema_loss is not None: + total_loss = total_loss + 0.1 * extrema_loss + + loss = total_loss # Backward pass training_start_time = time.time() @@ -2640,9 +2814,17 @@ class TradingOrchestrator: 'HOLD': float(action_probs[0, 2].item()) } - # Extract price predictions if available - price_prediction = None + # Extract price direction predictions if available + price_direction_data = None if price_pred is not None: + # Process price direction predictions + if hasattr(model.model, 'process_price_direction_predictions'): + try: + price_direction_data = model.model.process_price_direction_predictions(price_pred) + except Exception as e: + logger.debug(f"Error processing CNN price direction: {e}") + + # Fallback to old format for compatibility price_prediction = price_pred.squeeze(0).cpu().numpy().tolist() prediction = Prediction( @@ -2656,6 +2838,7 @@ class TradingOrchestrator: 'feature_size': len(base_data.get_feature_vector()), 'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'], 'price_prediction': price_prediction, + 'price_direction': price_direction_data, 'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None } ) @@ -2694,6 +2877,14 @@ class TradingOrchestrator: action_names = ['BUY', 'SELL', 'HOLD'] # Note: enhanced_cnn uses this order best_action = action_names[action_idx] + # Get price direction vectors from CNN model if available + price_direction_data = None + if hasattr(model.model, 'get_price_direction_vector'): + try: + price_direction_data = model.model.get_price_direction_vector() + except Exception as e: + logger.debug(f"Error getting price direction from CNN: {e}") + pred = Prediction( action=best_action, confidence=float(confidence), @@ -2708,7 +2899,8 @@ class TradingOrchestrator: metadata={ 'feature_vector_size': len(feature_vector), 'unified_input': True, - 'fallback_method': 'direct_model_inference' + 'fallback_method': 'direct_model_inference', + 'price_direction': price_direction_data } ) predictions.append(pred) @@ -2811,6 +3003,14 @@ class TradingOrchestrator: if q_values_for_capture: logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.") + # Get price direction vectors from DQN model if available + price_direction_data = None + if hasattr(model.model, 'get_price_direction_vector'): + try: + price_direction_data = model.model.get_price_direction_vector() + except Exception as e: + logger.debug(f"Error getting price direction from DQN: {e}") + prediction = Prediction( action=action, confidence=float(confidence), @@ -2818,7 +3018,10 @@ class TradingOrchestrator: timeframe='mixed', # RL uses mixed timeframes timestamp=datetime.now(), model_name=model.name, - metadata={'state_size': len(state)} + metadata={ + 'state_size': len(state), + 'price_direction': price_direction_data + } ) # Capture DQN prediction for dashboard visualization