predict price direction

2025-07-27 23:20:47 +03:00
parent dfa18035f1
commit 39267697f3
4 changed files with 572 additions and 101 deletions
--- a/.kiro/specs/multi-modal-trading-system/design.md
+++ b/.kiro/specs/multi-modal-trading-system/design.md
@@ -72,8 +72,10 @@ Based on the existing implementation in `core/data_provider.py`, we'll enhance i
   - OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
   - COB: for each 1s OHCLV we have  +- 20 buckets of COB ammounts in USD
   - 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets
- ***OUTPUTS***: suggested trade action (BUY/SELL)
-
+- ***OUTPUTS***: 
+    - suggested trade action (BUY/SELL/HOLD). Paired with confidence
+    - immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence
+    
 # Standardized input for all models:
 {
    'primary_symbol': 'ETH/USDT',
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -4,7 +4,7 @@ import torch.optim as optim
 import numpy as np
 from collections import deque
 import random
-from typing import Tuple, List
+from typing import Tuple, List, Dict, Any
 import os
 import sys
 import logging
@@ -84,8 +84,8 @@ class DQNNetwork(nn.Module):
            nn.Linear(512, 4)  # trending, ranging, volatile, mixed
        )
        
-        # Price prediction head
-        self.price_head = nn.Sequential(
+        # Price direction prediction head - outputs direction and confidence
+        self.price_direction_head = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.LayerNorm(1024),
            nn.ReLU(inplace=True),
@@ -93,9 +93,14 @@ class DQNNetwork(nn.Module):
            nn.Linear(1024, 512),
            nn.LayerNorm(512),
            nn.ReLU(inplace=True),
-            nn.Linear(512, 3)  # short, medium, long term price direction
+            nn.Linear(512, 2)  # [direction, confidence]
        )
        
+        # Direction activation (tanh for -1 to 1)
+        self.direction_activation = nn.Tanh()
+        # Confidence activation (sigmoid for 0 to 1)
+        self.confidence_activation = nn.Sigmoid()
+        
        # Volatility prediction head
        self.volatility_head = nn.Sequential(
            nn.Linear(2048, 1024),
@@ -105,7 +110,7 @@ class DQNNetwork(nn.Module):
            nn.Linear(1024, 256),
            nn.LayerNorm(256),
            nn.ReLU(inplace=True),
-            nn.Linear(256, 1)  # predicted volatility
+            nn.Linear(256, 4)  # predicted volatility for 4 timeframes
        )
        
        # Main Q-value head (dueling architecture)
@@ -162,7 +167,13 @@ class DQNNetwork(nn.Module):
        
        # Multiple prediction heads
        regime_pred = self.regime_head(features)
-        price_pred = self.price_head(features) 
+        price_direction_raw = self.price_direction_head(features)
+        
+        # Apply separate activations to direction and confidence
+        direction = self.direction_activation(price_direction_raw[:, 0:1])  # -1 to 1
+        confidence = self.confidence_activation(price_direction_raw[:, 1:2])  # 0 to 1
+        price_direction_pred = torch.cat([direction, confidence], dim=1)  # [batch, 2]
+        
        volatility_pred = self.volatility_head(features)
        
        # Dueling Q-network
@@ -172,7 +183,7 @@ class DQNNetwork(nn.Module):
        # Combine value and advantage for Q-values
        q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
        
-        return q_values, regime_pred, price_pred, volatility_pred, features
+        return q_values, regime_pred, price_direction_pred, volatility_pred, features
    
    def act(self, state, explore=True):
        """
@@ -196,7 +207,11 @@ class DQNNetwork(nn.Module):
            state = state.unsqueeze(0)
        
        with torch.no_grad():
-            q_values, regime_pred, price_pred, volatility_pred, features = self.forward(state)
+            q_values, regime_pred, price_direction_pred, volatility_pred, features = self.forward(state)
+            
+            # Process price direction predictions
+            if price_direction_pred is not None:
+                self.process_price_direction_predictions(price_direction_pred)
            
            # Get action probabilities using softmax
            action_probs = F.softmax(q_values, dim=1)
@@ -332,23 +347,10 @@ class DQNAgent:
        self.recent_prices = deque(maxlen=20)
        self.recent_rewards = deque(maxlen=100)
        
-        # Price prediction tracking
-        self.last_price_pred = {
-            'immediate': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            },
-            'midterm': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            },
-            'longterm': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            }
+        # Price direction tracking - stores direction and confidence
+        self.last_price_direction = {
+            'direction': 0.0,    # Single value between -1 and 1
+            'confidence': 0.0    # Single value between 0 and 1
        }
        
        # Store separate memory for price direction examples
@@ -521,25 +523,6 @@ class DQNAgent:
            logger.error(f"Error saving DQN checkpoint: {e}")
            return False
        
-        # Price prediction tracking
-        self.last_price_pred = {
-            'immediate': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            },
-            'midterm': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            },
-            'longterm': {
-                'direction': 1,  # Default to "sideways"
-                'confidence': 0.0,
-                'change': 0.0
-            }
-        }
-        
        # Store separate memory for price direction examples
        self.price_movement_memory = []  # For storing examples of clear price movements
        
@@ -811,6 +794,92 @@ class DQNAgent:
            logger.error(f"Error in act_with_confidence: {e}")
            # Return default action with low confidence
            return 1, 0.1, [0.45, 0.55]  # Default to HOLD action
+    
+    def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
+        """
+        Process price direction predictions and convert to standardized format
+        
+        Args:
+            price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
+            
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        try:
+            if price_direction_pred is None or price_direction_pred.numel() == 0:
+                return self.last_price_direction
+            
+            # Extract direction and confidence values
+            direction_value = float(price_direction_pred[0, 0].item())  # -1 to 1
+            confidence_value = float(price_direction_pred[0, 1].item())  # 0 to 1
+            
+            # Update last price direction
+            self.last_price_direction = {
+                'direction': direction_value,
+                'confidence': confidence_value
+            }
+            
+            return self.last_price_direction
+            
+        except Exception as e:
+            logger.error(f"Error processing price direction predictions: {e}")
+            return self.last_price_direction
+    
+    def get_price_direction_vector(self) -> Dict[str, float]:
+        """
+        Get the current price direction and confidence
+        
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        return self.last_price_direction
+    
+    def get_price_direction_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of price direction prediction
+        
+        Returns:
+            Dict containing direction and confidence information
+        """
+        try:
+            direction_value = self.last_price_direction['direction']
+            confidence_value = self.last_price_direction['confidence']
+            
+            # Convert to discrete direction
+            if direction_value > 0.1:
+                direction_label = "UP"
+                discrete_direction = 1
+            elif direction_value < -0.1:
+                direction_label = "DOWN"
+                discrete_direction = -1
+            else:
+                direction_label = "SIDEWAYS"
+                discrete_direction = 0
+            
+            return {
+                'direction_value': float(direction_value),
+                'confidence_value': float(confidence_value),
+                'direction_label': direction_label,
+                'discrete_direction': discrete_direction,
+                'strength': abs(float(direction_value)),
+                'weighted_strength': abs(float(direction_value)) * float(confidence_value)
+            }
+            
+        except Exception as e:
+            logger.error(f"Error calculating price direction summary: {e}")
+            return {
+                'direction_value': 0.0,
+                'confidence_value': 0.0,
+                'direction_label': "SIDEWAYS",
+                'discrete_direction': 0,
+                'strength': 0.0,
+                'weighted_strength': 0.0
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in act_with_confidence: {e}")
+            # Return default action with low confidence
+            return 1, 0.1, [0.45, 0.55]  # Default to HOLD action

    def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore):
        """
@@ -1032,11 +1101,8 @@ class DQNAgent:
            logger.error(f"Error converting experiences to tensors: {e}")
            return 0.0
        
-        # Choose training method based on precision mode
-        if self.use_mixed_precision:
-            loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
-        else:
-            loss = self._replay_standard(states, actions, rewards, next_states, dones)
+        # Always use standard training to fix gradient issues
+        loss = self._replay_standard(states, actions, rewards, next_states, dones)
        
        # Update epsilon
        if self.epsilon > self.epsilon_min:
@@ -1208,9 +1274,33 @@ class DQNAgent:
                
            q_loss = self.criterion(current_q_values, target_q_values.detach())
            
-            # Use only Q-loss for now to ensure clean gradients
+            # Calculate auxiliary losses and add to Q-loss
            total_loss = q_loss
            
+            # Add auxiliary losses if available
+            try:
+                # Get additional predictions from forward pass
+                if isinstance(q_values_output, tuple) and len(q_values_output) >= 5:
+                    current_regime_pred = q_values_output[1]
+                    current_price_pred = q_values_output[2]
+                    current_volatility_pred = q_values_output[3]
+                    current_extrema_pred = current_regime_pred  # Use regime as extrema proxy for now
+                    
+                    # Price direction loss
+                    if current_price_pred is not None and current_price_pred.shape[0] > 0:
+                        price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
+                        if price_direction_loss is not None:
+                            total_loss = total_loss + 0.2 * price_direction_loss
+                    
+                    # Extrema loss
+                    if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
+                        extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
+                        if extrema_loss is not None:
+                            total_loss = total_loss + 0.1 * extrema_loss
+                            
+            except Exception as e:
+                logger.debug(f"Could not add auxiliary loss in standard training: {e}")
+            
            # Reset gradients
            self.optimizer.zero_grad()
            
@@ -1309,13 +1399,17 @@ class DQNAgent:
                    
                    # Add auxiliary losses if available
                    try:
+                        # Price direction loss
+                        if current_price_pred is not None and current_price_pred.shape[0] > 0:
+                            price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
+                            if price_direction_loss is not None:
+                                loss = loss + 0.2 * price_direction_loss
+                        
+                        # Extrema loss
                        if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
-                            # Simple extrema targets
-                            with torch.no_grad():
-                                extrema_targets = torch.ones(current_extrema_pred.shape[0], dtype=torch.long, device=current_extrema_pred.device) * 2
-                            
-                            extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
-                            loss = loss + 0.1 * extrema_loss
+                            extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
+                            if extrema_loss is not None:
+                                loss = loss + 0.1 * extrema_loss
                            
                    except Exception as e:
                        logger.debug(f"Could not add auxiliary loss in mixed precision: {e}")
@@ -1649,6 +1743,95 @@ class DQNAgent:
            'exit_threshold': self.exit_confidence_threshold
        }
    
+    def _calculate_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Calculate loss for price direction predictions
+        
+        Args:
+            price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
+            rewards: Tensor of shape [batch] containing rewards
+            actions: Tensor of shape [batch] containing actions
+            
+        Returns:
+            Price direction loss tensor
+        """
+        try:
+            if price_direction_pred.size(1) != 2:
+                return None
+            
+            batch_size = price_direction_pred.size(0)
+            
+            # Extract direction and confidence predictions
+            direction_pred = price_direction_pred[:, 0]  # -1 to 1
+            confidence_pred = price_direction_pred[:, 1]  # 0 to 1
+            
+            # Create targets based on rewards and actions
+            with torch.no_grad():
+                # Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
+                direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
+                for i in range(batch_size):
+                    if rewards[i] > 0.01:  # Positive reward threshold
+                        if actions[i] == 0:  # BUY action
+                            direction_targets[i] = 1.0  # UP
+                        elif actions[i] == 1:  # SELL action
+                            direction_targets[i] = -1.0  # DOWN
+                    # else: targets remain 0 (sideways)
+                
+                # Confidence targets: based on reward magnitude (higher reward = higher confidence)
+                confidence_targets = torch.abs(rewards).clamp(0, 1)
+            
+            # Calculate losses for each component
+            direction_loss = F.mse_loss(direction_pred, direction_targets)
+            confidence_loss = F.mse_loss(confidence_pred, confidence_targets)
+            
+            # Combined loss (direction is more important than confidence)
+            total_loss = direction_loss + 0.3 * confidence_loss
+            
+            return total_loss
+            
+        except Exception as e:
+            logger.debug(f"Error calculating price direction loss: {e}")
+            return None
+    
+    def _calculate_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Calculate loss for extrema predictions
+        
+        Args:
+            extrema_pred: Extrema predictions
+            rewards: Tensor containing rewards
+            actions: Tensor containing actions
+            
+        Returns:
+            Extrema loss tensor
+        """
+        try:
+            batch_size = extrema_pred.size(0)
+            
+            # Create targets based on reward patterns
+            with torch.no_grad():
+                extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2  # Default to "neither"
+                
+                for i in range(batch_size):
+                    # High positive reward suggests we're at a good entry point (potential bottom for BUY, top for SELL)
+                    if rewards[i] > 0.05:
+                        if actions[i] == 0:  # BUY action
+                            extrema_targets[i] = 0  # Bottom
+                        elif actions[i] == 1:  # SELL action
+                            extrema_targets[i] = 1  # Top
+            
+            # Calculate cross-entropy loss
+            if extrema_pred.size(1) >= 3:
+                extrema_loss = F.cross_entropy(extrema_pred[:, :3], extrema_targets)
+            else:
+                extrema_loss = F.cross_entropy(extrema_pred, extrema_targets)
+            
+            return extrema_loss
+            
+        except Exception as e:
+            logger.debug(f"Error calculating extrema loss: {e}")
+            return None
+
    def get_enhanced_training_stats(self):
        """Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)"""
        return {
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@@ -265,8 +265,9 @@ class EnhancedCNN(nn.Module):
            nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
        )
        
-        # ULTRA MASSIVE multi-timeframe price prediction heads
-        self.price_pred_immediate = nn.Sequential(
+        # ULTRA MASSIVE price direction prediction head
+        # Outputs single direction and confidence values
+        self.price_direction_head = nn.Sequential(
            nn.Linear(1024, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -275,32 +276,13 @@ class EnhancedCNN(nn.Module):
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
+            nn.Linear(256, 2)  # [direction, confidence]
        )
        
-        self.price_pred_midterm = nn.Sequential(
-            nn.Linear(1024, 1024), # Increased from 512
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(1024, 512), # Increased from 256
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(512, 256), # Increased from 128
-            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
-        )
-        
-        self.price_pred_longterm = nn.Sequential(
-            nn.Linear(1024, 1024), # Increased from 512
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(1024, 512), # Increased from 256
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(512, 256), # Increased from 128
-            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
-        )
+        # Direction activation (tanh for -1 to 1)
+        self.direction_activation = nn.Tanh()
+        # Confidence activation (sigmoid for 0 to 1)
+        self.confidence_activation = nn.Sigmoid()
        
        # ULTRA MASSIVE value prediction with ensemble approaches
        self.price_pred_value = nn.Sequential(
@@ -490,10 +472,14 @@ class EnhancedCNN(nn.Module):
        # Extrema predictions (bottom/top/neither detection)
        extrema_pred = self.extrema_head(features_refined)
        
-        # Multi-timeframe price movement predictions
-        price_immediate = self.price_pred_immediate(features_refined)
-        price_midterm = self.price_pred_midterm(features_refined)
-        price_longterm = self.price_pred_longterm(features_refined)
+        # Price direction predictions
+        price_direction_raw = self.price_direction_head(features_refined)
+        
+        # Apply separate activations to direction and confidence
+        direction = self.direction_activation(price_direction_raw[:, 0:1])  # -1 to 1
+        confidence = self.confidence_activation(price_direction_raw[:, 1:2])  # 0 to 1
+        price_direction_pred = torch.cat([direction, confidence], dim=1)  # [batch, 2]
+        
        price_values = self.price_pred_value(features_refined)
        
        # Additional specialized predictions for enhanced accuracy
@@ -502,15 +488,14 @@ class EnhancedCNN(nn.Module):
        market_regime_pred = self.market_regime_head(features_refined)
        risk_pred = self.risk_head(features_refined)
        
-        # Package all price predictions into a single tensor (use immediate as primary)
-        # For compatibility with DQN agent, we return price_immediate as the price prediction tensor
-        price_pred_tensor = price_immediate
+        # Use the price direction prediction directly (already [batch, 2])
+        price_direction_tensor = price_direction_pred
        
        # Package additional predictions into a single tensor (use volatility as primary)
        # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
        advanced_pred_tensor = volatility_pred
        
-        return q_values, extrema_pred, price_pred_tensor, features_refined, advanced_pred_tensor
+        return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
    
    def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
        """Enhanced action selection with ultra massive model predictions"""
@@ -528,7 +513,11 @@ class EnhancedCNN(nn.Module):
                state_tensor = state_tensor.unsqueeze(0)
        
        with torch.no_grad():
-            q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
+            q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
+            
+            # Process price direction predictions
+            if price_direction_predictions is not None:
+                self.process_price_direction_predictions(price_direction_predictions)
            
            # Apply softmax to get action probabilities
            action_probs_tensor = torch.softmax(q_values, dim=1)
@@ -565,6 +554,100 @@ class EnhancedCNN(nn.Module):
                logger.info(f"  Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
            
            return action_idx, confidence, action_probs
+    
+    def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
+        """
+        Process price direction predictions and convert to standardized format
+        
+        Args:
+            price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
+            
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        try:
+            if price_direction_pred is None or price_direction_pred.numel() == 0:
+                return {}
+            
+            # Extract direction and confidence values
+            direction_value = float(price_direction_pred[0, 0].item())  # -1 to 1
+            confidence_value = float(price_direction_pred[0, 1].item())  # 0 to 1
+            
+            processed_directions = {
+                'direction': direction_value,
+                'confidence': confidence_value
+            }
+            
+            # Store for later access
+            self.last_price_direction = processed_directions
+            
+            return processed_directions
+            
+        except Exception as e:
+            logger.error(f"Error processing price direction predictions: {e}")
+            return {}
+    
+    def get_price_direction_vector(self) -> Dict[str, float]:
+        """
+        Get the current price direction and confidence
+        
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        return getattr(self, 'last_price_direction', {})
+    
+    def get_price_direction_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of price direction prediction
+        
+        Returns:
+            Dict containing direction and confidence information
+        """
+        try:
+            last_direction = getattr(self, 'last_price_direction', {})
+            if not last_direction:
+                return {
+                    'direction_value': 0.0,
+                    'confidence_value': 0.0,
+                    'direction_label': "SIDEWAYS",
+                    'discrete_direction': 0,
+                    'strength': 0.0,
+                    'weighted_strength': 0.0
+                }
+            
+            direction_value = last_direction['direction']
+            confidence_value = last_direction['confidence']
+            
+            # Convert to discrete direction
+            if direction_value > 0.1:
+                direction_label = "UP"
+                discrete_direction = 1
+            elif direction_value < -0.1:
+                direction_label = "DOWN"
+                discrete_direction = -1
+            else:
+                direction_label = "SIDEWAYS"
+                discrete_direction = 0
+            
+            return {
+                'direction_value': float(direction_value),
+                'confidence_value': float(confidence_value),
+                'direction_label': direction_label,
+                'discrete_direction': discrete_direction,
+                'strength': abs(float(direction_value)),
+                'weighted_strength': abs(float(direction_value)) * float(confidence_value)
+            }
+            
+        except Exception as e:
+            logger.error(f"Error calculating price direction summary: {e}")
+            return {
+                'direction_value': 0.0,
+                'confidence_value': 0.0,
+                'direction_label': "SIDEWAYS",
+                'discrete_direction': 0,
+                'strength': 0.0,
+                'weighted_strength': 0.0
+            }
        
    def save(self, path):
        """Save model weights and architecture"""
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -719,6 +719,95 @@ class TradingOrchestrator:
        except Exception as e:
            logger.error(f"Error initializing ML models: {e}")

+    def _calculate_cnn_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Calculate price direction loss for CNN model
+        
+        Args:
+            price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
+            rewards: Tensor of shape [batch] containing rewards
+            actions: Tensor of shape [batch] containing actions
+            
+        Returns:
+            Price direction loss tensor
+        """
+        try:
+            if price_direction_pred.size(1) != 2:
+                return None
+            
+            batch_size = price_direction_pred.size(0)
+            
+            # Extract direction and confidence predictions
+            direction_pred = price_direction_pred[:, 0]  # -1 to 1
+            confidence_pred = price_direction_pred[:, 1]  # 0 to 1
+            
+            # Create targets based on rewards and actions
+            with torch.no_grad():
+                # Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
+                direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
+                for i in range(batch_size):
+                    if rewards[i] > 0.01:  # Positive reward threshold
+                        if actions[i] == 0:  # BUY action
+                            direction_targets[i] = 1.0  # UP
+                        elif actions[i] == 1:  # SELL action
+                            direction_targets[i] = -1.0  # DOWN
+                    # else: targets remain 0 (sideways)
+                
+                # Confidence targets: based on reward magnitude (higher reward = higher confidence)
+                confidence_targets = torch.abs(rewards).clamp(0, 1)
+            
+            # Calculate losses for each component
+            direction_loss = nn.MSELoss()(direction_pred, direction_targets)
+            confidence_loss = nn.MSELoss()(confidence_pred, confidence_targets)
+            
+            # Combined loss (direction is more important than confidence)
+            total_loss = direction_loss + 0.3 * confidence_loss
+            
+            return total_loss
+            
+        except Exception as e:
+            logger.debug(f"Error calculating CNN price direction loss: {e}")
+            return None
+    
+    def _calculate_cnn_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Calculate extrema loss for CNN model
+        
+        Args:
+            extrema_pred: Extrema predictions
+            rewards: Tensor containing rewards
+            actions: Tensor containing actions
+            
+        Returns:
+            Extrema loss tensor
+        """
+        try:
+            batch_size = extrema_pred.size(0)
+            
+            # Create targets based on reward patterns
+            with torch.no_grad():
+                extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2  # Default to "neither"
+                
+                for i in range(batch_size):
+                    # High positive reward suggests we're at a good entry point
+                    if rewards[i] > 0.05:
+                        if actions[i] == 0:  # BUY action
+                            extrema_targets[i] = 0  # Bottom
+                        elif actions[i] == 1:  # SELL action
+                            extrema_targets[i] = 1  # Top
+            
+            # Calculate cross-entropy loss
+            if extrema_pred.size(1) >= 3:
+                extrema_loss = nn.CrossEntropyLoss()(extrema_pred[:, :3], extrema_targets)
+            else:
+                extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets)
+            
+            return extrema_loss
+            
+        except Exception as e:
+            logger.debug(f"Error calculating CNN extrema loss: {e}")
+            return None
+
    def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None):
        """Update model loss and potentially best loss"""
        if model_name in self.model_states:
@@ -1938,7 +2027,71 @@ class TradingOrchestrator:
            # Evaluate the previous prediction and train the model immediately
            await self._evaluate_and_train_on_record(inference_record, current_price)
            
-            logger.info(f"Completed immediate training for {model_name}")
+            # Log predicted vs actual outcome
+            prediction = inference_record.get('prediction', {})
+            predicted_action = prediction.get('action', 'UNKNOWN')
+            predicted_confidence = prediction.get('confidence', 0.0)
+            
+            # Calculate actual outcome
+            symbol = inference_record.get('symbol', 'ETH/USDT')
+            predicted_price = None
+            actual_price_change_pct = 0.0
+            
+            # Try to get price direction vectors from metadata (new format)
+            if 'price_direction' in prediction and prediction['price_direction']:
+                try:
+                    price_direction_data = prediction['price_direction']
+                    # Process price direction data
+                    if isinstance(price_direction_data, dict) and 'direction' in price_direction_data:
+                        direction = price_direction_data['direction']
+                        confidence = price_direction_data.get('confidence', 1.0)
+                        
+                        # Convert direction to price change percentage
+                        # Scale by confidence and direction strength
+                        predicted_price_change_pct = direction * confidence * 0.02  # 2% max change
+                        predicted_price = current_price * (1 + predicted_price_change_pct)
+                except Exception as e:
+                    logger.debug(f"Error processing price direction data: {e}")
+            
+            # Fallback to old price prediction format
+            elif 'price_prediction' in prediction and prediction['price_prediction']:
+                try:
+                    price_prediction_data = prediction['price_prediction']
+                    if isinstance(price_prediction_data, list) and len(price_prediction_data) > 0:
+                        predicted_price_change_pct = float(price_prediction_data[0]) * 0.01
+                        predicted_price = current_price * (1 + predicted_price_change_pct)
+                except Exception:
+                    pass
+            
+            # Calculate price change
+            if predicted_price is not None:
+                actual_price_change_pct = (current_price - predicted_price) / predicted_price * 100
+                price_outcome = f"Predicted: ${predicted_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
+            else:
+                # Fall back to historical price comparison
+                historical_data = self.data_provider.get_historical_data(symbol, '1m', limit=10)
+                if historical_data is not None and not historical_data.empty:
+                    historical_price = historical_data['close'].iloc[-1]
+                    actual_price_change_pct = (current_price - historical_price) / historical_price * 100
+                    price_outcome = f"Historical: ${historical_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
+                else:
+                    price_outcome = f"Actual: ${current_price:.2f}"
+            
+            # Determine if prediction was correct based on action and price movement
+            was_correct = False
+            if predicted_action == 'BUY' and actual_price_change_pct > 0.1:  # Price went up
+                was_correct = True
+            elif predicted_action == 'SELL' and actual_price_change_pct < -0.1:  # Price went down
+                was_correct = True
+            elif predicted_action == 'HOLD' and abs(actual_price_change_pct) < 0.5:  # Price stayed stable
+                was_correct = True
+            
+            outcome_status = "✅ CORRECT" if was_correct else "❌ INCORRECT"
+            
+            logger.info(f"Completed immediate training for {model_name} - {outcome_status}")
+            logger.info(f"  Prediction: {predicted_action} ({predicted_confidence:.3f})")
+            logger.info(f"  {price_outcome}")
+            logger.info(f"  Outcome: {outcome_status}")
            
        except Exception as e:
            logger.error(f"Error in immediate training for {model_name}: {e}")
@@ -2412,12 +2565,33 @@ class TradingOrchestrator:
                    self.cnn_optimizer.zero_grad()
                    
                    # Forward pass
-                    q_values, extrema_pred, price_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
+                    q_values, extrema_pred, price_direction_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
                    
-                    # Calculate loss
+                    # Calculate primary Q-value loss
                    q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
                    target_q = reward_tensor  # Simplified target
-                    loss = nn.MSELoss()(q_values_selected, target_q)
+                    q_loss = nn.MSELoss()(q_values_selected, target_q)
+                    
+                    # Calculate auxiliary losses for price direction and extrema
+                    total_loss = q_loss
+                    
+                    # Price direction loss
+                    if price_direction_pred is not None and price_direction_pred.shape[0] > 0:
+                        price_direction_loss = self._calculate_cnn_price_direction_loss(
+                            price_direction_pred, reward_tensor, action_tensor
+                        )
+                        if price_direction_loss is not None:
+                            total_loss = total_loss + 0.2 * price_direction_loss
+                    
+                    # Extrema loss
+                    if extrema_pred is not None and extrema_pred.shape[0] > 0:
+                        extrema_loss = self._calculate_cnn_extrema_loss(
+                            extrema_pred, reward_tensor, action_tensor
+                        )
+                        if extrema_loss is not None:
+                            total_loss = total_loss + 0.1 * extrema_loss
+                    
+                    loss = total_loss
                    
                    # Backward pass
                    training_start_time = time.time()
@@ -2640,9 +2814,17 @@ class TradingOrchestrator:
                            'HOLD': float(action_probs[0, 2].item())
                        }
                        
-                        # Extract price predictions if available
-                        price_prediction = None
+                        # Extract price direction predictions if available
+                        price_direction_data = None
                        if price_pred is not None:
+                            # Process price direction predictions
+                            if hasattr(model.model, 'process_price_direction_predictions'):
+                                try:
+                                    price_direction_data = model.model.process_price_direction_predictions(price_pred)
+                                except Exception as e:
+                                    logger.debug(f"Error processing CNN price direction: {e}")
+                            
+                            # Fallback to old format for compatibility
                            price_prediction = price_pred.squeeze(0).cpu().numpy().tolist()
                        
                        prediction = Prediction(
@@ -2656,6 +2838,7 @@ class TradingOrchestrator:
                                'feature_size': len(base_data.get_feature_vector()),
                                'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'],
                                'price_prediction': price_prediction,
+                                'price_direction': price_direction_data,
                                'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None
                            }
                        )
@@ -2694,6 +2877,14 @@ class TradingOrchestrator:
                        action_names = ['BUY', 'SELL', 'HOLD']  # Note: enhanced_cnn uses this order
                        best_action = action_names[action_idx]
                        
+                        # Get price direction vectors from CNN model if available
+                        price_direction_data = None
+                        if hasattr(model.model, 'get_price_direction_vector'):
+                            try:
+                                price_direction_data = model.model.get_price_direction_vector()
+                            except Exception as e:
+                                logger.debug(f"Error getting price direction from CNN: {e}")
+                        
                        pred = Prediction(
                            action=best_action,
                            confidence=float(confidence),
@@ -2708,7 +2899,8 @@ class TradingOrchestrator:
                            metadata={
                                'feature_vector_size': len(feature_vector),
                                'unified_input': True,
-                                'fallback_method': 'direct_model_inference'
+                                'fallback_method': 'direct_model_inference',
+                                'price_direction': price_direction_data
                            }
                        )
                        predictions.append(pred)
@@ -2811,6 +3003,14 @@ class TradingOrchestrator:
                if q_values_for_capture:
                    logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.")
            
+            # Get price direction vectors from DQN model if available
+            price_direction_data = None
+            if hasattr(model.model, 'get_price_direction_vector'):
+                try:
+                    price_direction_data = model.model.get_price_direction_vector()
+                except Exception as e:
+                    logger.debug(f"Error getting price direction from DQN: {e}")
+            
            prediction = Prediction(
                action=action,
                confidence=float(confidence),
@@ -2818,7 +3018,10 @@ class TradingOrchestrator:
                timeframe='mixed',  # RL uses mixed timeframes
                timestamp=datetime.now(),
                model_name=model.name,
-                metadata={'state_size': len(state)}
+                metadata={
+                    'state_size': len(state),
+                    'price_direction': price_direction_data
+                }
            )
            
            # Capture DQN prediction for dashboard visualization