PnL in reward, show leveraged power in dash (broken)

2025-07-29 17:42:00 +03:00
parent d35530a9e9
commit 3a532a1220
5 changed files with 553 additions and 49 deletions
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@ -3,6 +3,7 @@ import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import os
+import time
 import logging
 import torch.nn.functional as F
 from typing import List, Tuple, Dict, Any, Optional, Union
@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module):
                'weighted_strength': 0.0
            }
    
-    def add_training_data(self, state, action, reward):
+    def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
        """
-        Add training data to the model's training buffer
+        Add training data to the model's training buffer with position-based reward enhancement
        
        Args:
            state: Input state
            action: Action taken
-            reward: Reward received
+            reward: Base reward received
+            position_pnl: Current position P&L (0.0 if no position)
+            has_position: Whether we currently have an open position
        """
        try:
+            # Enhance reward based on position status
+            enhanced_reward = self._calculate_position_enhanced_reward(
+                reward, action, position_pnl, has_position
+            )
+            
            self.training_data.append({
                'state': state,
                'action': action,
-                'reward': reward,
+                'reward': enhanced_reward,
+                'base_reward': reward,  # Keep original reward for analysis
+                'position_pnl': position_pnl,
+                'has_position': has_position,
                'timestamp': time.time()
            })
            
@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module):
                
        except Exception as e:
            logger.error(f"Error adding training data: {e}")
+
+    def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
+        """
+        Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
+        
+        Args:
+            base_reward: Original reward from price prediction accuracy
+            action: Action taken ('BUY', 'SELL', 'HOLD')
+            position_pnl: Current position P&L
+            has_position: Whether we have an open position
+            
+        Returns:
+            Enhanced reward that incentivizes profitable behavior
+        """
+        try:
+            enhanced_reward = base_reward
+            
+            if has_position and position_pnl != 0.0:
+                # Position-based reward adjustments
+                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
+                
+                if position_pnl > 0:  # Profitable position
+                    if action == "HOLD":
+                        # Reward holding profitable positions (let winners run)
+                        enhanced_reward += abs(pnl_factor) * 0.5
+                    elif action in ["BUY", "SELL"]:
+                        # Moderate reward for taking action on profitable positions
+                        enhanced_reward += abs(pnl_factor) * 0.3
+                        
+                elif position_pnl < 0:  # Losing position
+                    if action == "HOLD":
+                        # Penalty for holding losing positions (cut losses)
+                        enhanced_reward -= abs(pnl_factor) * 0.8
+                    elif action in ["BUY", "SELL"]:
+                        # Reward for taking action to close losing positions
+                        enhanced_reward += abs(pnl_factor) * 0.6
+                        
+            # Ensure reward doesn't become extreme
+            enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
+            
+            return enhanced_reward
+            
+        except Exception as e:
+            logger.error(f"Error calculating position-enhanced reward: {e}")
+            return base_reward
        
    def save(self, path):
        """Save model weights and architecture"""
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@ -3267,12 +3267,15 @@ class TradingOrchestrator:
            avg_loss = model_stats.average_loss if model_stats else None

            # Calculate reward for logging
+            current_pnl = self._get_current_position_pnl(self.symbol)
            reward, _ = self._calculate_sophisticated_reward(
                predicted_action,
                predicted_confidence,
                actual_price_change_pct,
                time_diff_seconds / 60,  # Convert to minutes
                has_price_prediction=predicted_price is not None,
+                symbol=self.symbol,
+                current_position_pnl=current_pnl,
            )

            # Enhanced logging with detailed information
@ -3361,6 +3364,7 @@ class TradingOrchestrator:
            )  # Default to 0.5 if missing

            # Calculate sophisticated reward based on multiple factors
+            current_pnl = self._get_current_position_pnl(symbol)
            reward, was_correct = self._calculate_sophisticated_reward(
                predicted_action,
                prediction_confidence,
@ -3369,6 +3373,7 @@ class TradingOrchestrator:
                inference_price is not None,  # Add price prediction flag
                symbol,  # Pass symbol for position lookup
                None,  # Let method determine position status
+                current_position_pnl=current_pnl,
            )

            # Update model performance tracking
@ -3476,10 +3481,11 @@ class TradingOrchestrator:
        has_price_prediction: bool = False,
        symbol: str = None,
        has_position: bool = None,
+        current_position_pnl: float = 0.0,
    ) -> tuple[float, bool]:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
-        Now considers position status when evaluating HOLD decisions
+        Now considers position status and current P&L when evaluating decisions

        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@ -3489,6 +3495,7 @@ class TradingOrchestrator:
            has_price_prediction: Whether the model made a price prediction
            symbol: Trading symbol (for position lookup)
            has_position: Whether we currently have a position (if None, will be looked up)
+            current_position_pnl: Current unrealized P&L of open position (0.0 if no position)

        Returns:
            tuple: (reward, was_correct)
@ -3500,6 +3507,9 @@ class TradingOrchestrator:
            # Determine current position status if not provided
            if has_position is None and symbol:
                has_position = self._has_open_position(symbol)
+                # Get current position P&L if we have a position
+                if has_position and current_position_pnl == 0.0:
+                    current_position_pnl = self._get_current_position_pnl(symbol)
            elif has_position is None:
                has_position = False

@ -3518,19 +3528,37 @@ class TradingOrchestrator:
                    0, -price_change_pct
                )  # Positive for downward movement
            elif predicted_action == "HOLD":
-                # HOLD evaluation now considers position status
+                # HOLD evaluation now considers position status AND current P&L
                if has_position:
-                    # If we have a position, HOLD is correct if price moved favorably or stayed stable
-                    # This prevents penalizing HOLD when we're already in a profitable position
-                    if price_change_pct > 0:  # Price went up while holding - good
-                        was_correct = True
-                        directional_accuracy = price_change_pct  # Reward based on profit
-                    elif abs(price_change_pct) < movement_threshold:  # Price stable - neutral
-                        was_correct = True
-                        directional_accuracy = movement_threshold - abs(price_change_pct)
-                    else:  # Price dropped while holding - bad, but less penalty than wrong direction
-                        was_correct = False
-                        directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
+                    # If we have a position, HOLD evaluation depends on P&L and price movement
+                    if current_position_pnl > 0:  # Currently profitable position
+                        # Holding a profitable position is good if price continues favorably
+                        if price_change_pct > 0:  # Price went up while holding profitable position - excellent
+                            was_correct = True
+                            directional_accuracy = price_change_pct * 1.5  # Bonus for holding winners
+                        elif abs(price_change_pct) < movement_threshold:  # Price stable - good
+                            was_correct = True
+                            directional_accuracy = movement_threshold + (current_position_pnl / 100.0)  # Reward based on existing profit
+                        else:  # Price dropped while holding profitable position - still okay but less reward
+                            was_correct = True
+                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
+                    elif current_position_pnl < 0:  # Currently losing position
+                        # Holding a losing position is generally bad - should consider closing
+                        if price_change_pct > movement_threshold:  # Price recovered - good hold
+                            was_correct = True
+                            directional_accuracy = price_change_pct * 0.8  # Reduced reward for recovery
+                        else:  # Price continued down or stayed flat - bad hold
+                            was_correct = False
+                            # Penalty proportional to loss magnitude
+                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.5  # Penalty for holding losers
+                    else:  # Breakeven position
+                        # Standard HOLD evaluation for breakeven positions
+                        if abs(price_change_pct) < movement_threshold:  # Price stable - good
+                            was_correct = True
+                            directional_accuracy = movement_threshold - abs(price_change_pct)
+                        else:  # Price moved significantly - missed opportunity
+                            was_correct = False
+                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
                else:
                    # If we don't have a position, HOLD is correct if price stayed relatively stable
                    was_correct = abs(price_change_pct) < movement_threshold
@ -3627,12 +3655,16 @@ class TradingOrchestrator:

            # Calculate reward if not provided
            if sophisticated_reward is None:
+                symbol = record.get("symbol", self.symbol)
+                current_pnl = self._get_current_position_pnl(symbol)
                sophisticated_reward, _ = self._calculate_sophisticated_reward(
                    record.get("action", "HOLD"),
                    record.get("confidence", 0.5),
                    price_change_pct,
                    record.get("time_diff_minutes", 1.0),
                    record.get("has_price_prediction", False),
+                    symbol=symbol,
+                    current_position_pnl=current_pnl,
                )

            # Train decision fusion model if it's the model being evaluated
@ -6510,7 +6542,7 @@ class TradingOrchestrator:
            logger.error(f"Error getting combined model data for {symbol}: {e}")
            return None

-    def _get_current_position_pnl(self, symbol: str, current_price: float) -> float:
+    def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
        """Get current position P&L for the symbol"""
        try:
            if self.trading_executor and hasattr(
@ -6518,16 +6550,22 @@ class TradingOrchestrator:
            ):
                position = self.trading_executor.get_current_position(symbol)
                if position:
-                    entry_price = position.get("price", 0)
-                    size = position.get("size", 0)
-                    side = position.get("side", "LONG")
+                    # If current_price is provided, calculate P&L manually
+                    if current_price is not None:
+                        entry_price = position.get("price", 0)
+                        size = position.get("size", 0)
+                        side = position.get("side", "LONG")

-                    if entry_price and size > 0:
-                        if side.upper() == "LONG":
-                            pnl = (current_price - entry_price) * size
-                        else:  # SHORT
-                            pnl = (entry_price - current_price) * size
-                        return pnl
+                        if entry_price and size > 0:
+                            if side.upper() == "LONG":
+                                pnl = (current_price - entry_price) * size
+                            else:  # SHORT
+                                pnl = (entry_price - current_price) * size
+                            return pnl
+                    else:
+                        # Use unrealized_pnl from position if available
+                        if position.get("size", 0) > 0:
+                            return float(position.get("unrealized_pnl", 0.0))
            return 0.0
        except Exception as e:
            logger.debug(f"Error getting position P&L for {symbol}: {e}")
@ -6545,6 +6583,53 @@ class TradingOrchestrator:
        except Exception:
            return False

+
+
+    def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
+        """
+        Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
+        
+        Args:
+            base_reward: Original reward from confidence/execution
+            action: Action taken ('BUY', 'SELL', 'HOLD')
+            position_pnl: Current position P&L
+            has_position: Whether we have an open position
+            
+        Returns:
+            Enhanced reward that incentivizes profitable behavior
+        """
+        try:
+            enhanced_reward = base_reward
+            
+            if has_position and position_pnl != 0.0:
+                # Position-based reward adjustments (similar to CNN but tuned for DQN)
+                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
+                
+                if position_pnl > 0:  # Profitable position
+                    if action == "HOLD":
+                        # Reward holding profitable positions (let winners run)
+                        enhanced_reward += abs(pnl_factor) * 0.4
+                    elif action in ["BUY", "SELL"]:
+                        # Moderate reward for taking action on profitable positions
+                        enhanced_reward += abs(pnl_factor) * 0.2
+                        
+                elif position_pnl < 0:  # Losing position
+                    if action == "HOLD":
+                        # Strong penalty for holding losing positions (cut losses)
+                        enhanced_reward -= abs(pnl_factor) * 1.0
+                    elif action in ["BUY", "SELL"]:
+                        # Strong reward for taking action to close losing positions
+                        enhanced_reward += abs(pnl_factor) * 0.8
+                        
+            # Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
+            enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
+            
+            return enhanced_reward
+            
+        except Exception as e:
+            logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
+            return base_reward
+
    def _close_all_positions(self):
        """Close all open positions when clearing session"""
        try:
@ -6889,28 +6974,35 @@ class TradingOrchestrator:
                                action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
                                dqn_action = action_mapping.get(action, 2)

-                                # Calculate immediate reward based on confidence and execution
-                                immediate_reward = confidence if action != "HOLD" else 0.0
+                                # Get position information for enhanced rewards
+                                has_position = self._has_open_position(symbol)
+                                position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
+
+                                # Calculate position-enhanced reward
+                                base_reward = confidence if action != "HOLD" else 0.1
+                                enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
+                                    base_reward, action, position_pnl, has_position
+                                )

                                # Add experience to DQN
                                self.rl_agent.remember(
                                    state=state,
                                    action=dqn_action,
-                                    reward=immediate_reward,
+                                    reward=enhanced_reward,
                                    next_state=state,  # Will be updated with actual outcome later
                                    done=False,
                                )

                                models_trained.append("dqn")
                                logger.debug(
-                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
+                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
                                )

                except Exception as e:
                    logger.debug(f"Error training DQN on decision: {e}")

            # Train CNN model if available and enabled
-            if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"):
+            if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
                try:
                    # Create CNN input features from base_data (same as inference)
                    cnn_features = self._create_cnn_features_from_base_data(
@ -6919,19 +7011,30 @@ class TradingOrchestrator:

                    # Create target based on action
                    target_mapping = {
-                        "BUY": [1, 0, 0],
-                        "SELL": [0, 1, 0],
-                        "HOLD": [0, 0, 1],
+                        "BUY": 0,  # Action indices for CNN
+                        "SELL": 1,
+                        "HOLD": 2,
                    }
-                    target = target_mapping.get(action, [0, 0, 1])
+                    target_action = target_mapping.get(action, 2)

-                    # Add training sample
-                    self.cnn_model.add_training_sample(
-                        cnn_features, target, weight=confidence
+                    # Get position information for enhanced rewards
+                    has_position = self._has_open_position(symbol)
+                    position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
+
+                    # Calculate base reward from confidence and add position-based enhancement
+                    base_reward = confidence if action != "HOLD" else 0.1
+
+                    # Add training data with position-based reward enhancement
+                    self.cnn_model.add_training_data(
+                        cnn_features, 
+                        target_action, 
+                        base_reward,
+                        position_pnl=position_pnl,
+                        has_position=has_position
                    )

                    models_trained.append("cnn")
-                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol}")
+                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")

                except Exception as e:
                    logger.debug(f"Error training CNN on decision: {e}")
--- a/test_dashboard_data_flow.py
+++ b/test_dashboard_data_flow.py
@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""
+Test script to debug dashboard data flow issues
+
+This script tests if the dashboard can properly retrieve and display model data.
+"""
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+from web.clean_dashboard import CleanTradingDashboard
+from core.orchestrator import TradingOrchestrator
+from core.data_provider import DataProvider
+
+def test_dashboard_data_flow():
+    """Test if dashboard can retrieve model data correctly"""
+    
+    print("🧪 DASHBOARD DATA FLOW TEST")
+    print("=" * 50)
+    
+    try:
+        # Initialize components
+        data_provider = DataProvider()
+        orchestrator = TradingOrchestrator(data_provider=data_provider)
+        
+        print(f"✅ Orchestrator initialized")
+        print(f"   Model registry models: {list(orchestrator.model_registry.get_all_models().keys())}")
+        print(f"   Model toggle states: {list(orchestrator.model_toggle_states.keys())}")
+        
+        # Initialize dashboard
+        dashboard = CleanTradingDashboard(
+            data_provider=data_provider,
+            orchestrator=orchestrator
+        )
+        
+        print(f"✅ Dashboard initialized")
+        
+        # Test available models
+        available_models = dashboard._get_available_models()
+        print(f"   Available models: {list(available_models.keys())}")
+        
+        # Test training metrics
+        print("\n📊 Testing training metrics...")
+        toggle_states = {}
+        for model_name in available_models.keys():
+            toggle_states[model_name] = orchestrator.get_model_toggle_state(model_name)
+        
+        print(f"   Toggle states: {list(toggle_states.keys())}")
+        
+        metrics_data = dashboard._get_training_metrics(toggle_states)
+        print(f"   Metrics data type: {type(metrics_data)}")
+        
+        if metrics_data and isinstance(metrics_data, dict):
+            print(f"   Metrics keys: {list(metrics_data.keys())}")
+            if 'loaded_models' in metrics_data:
+                loaded_models = metrics_data['loaded_models']
+                print(f"   Loaded models count: {len(loaded_models)}")
+                for model_name, model_info in loaded_models.items():
+                    print(f"     - {model_name}: active={model_info.get('active', False)}")
+            else:
+                print("   ❌ No 'loaded_models' in metrics_data!")
+        else:
+            print(f"   ❌ Invalid metrics_data: {metrics_data}")
+        
+        # Test component manager formatting
+        print("\n🎨 Testing component manager...")
+        formatted_components = dashboard.component_manager.format_training_metrics(metrics_data)
+        print(f"   Formatted components type: {type(formatted_components)}")
+        print(f"   Formatted components count: {len(formatted_components) if formatted_components else 0}")
+        
+        if formatted_components:
+            print("   ✅ Component manager returned formatted data")
+        else:
+            print("   ❌ Component manager returned empty data")
+        
+        print("\n🚀 Dashboard data flow test completed!")
+        return True
+        
+    except Exception as e:
+        print(f"❌ Test failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+if __name__ == "__main__":
+    test_dashboard_data_flow()
--- a/test_position_based_rewards.py
+++ b/test_position_based_rewards.py
@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Test script for position-based reward system
+
+This script tests the enhanced reward calculations that incentivize:
+1. Holding profitable positions (let winners run)
+2. Closing losing positions (cut losses)
+3. Taking action when appropriate based on P&L
+"""
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from core.orchestrator import TradingOrchestrator
+from NN.models.enhanced_cnn import EnhancedCNN
+import numpy as np
+
+def test_position_reward_scenarios():
+    """Test various position-based reward scenarios"""
+    
+    print("🧪 POSITION-BASED REWARD SYSTEM TEST")
+    print("=" * 50)
+    
+    # Initialize orchestrator
+    orchestrator = TradingOrchestrator()
+    
+    # Test scenarios
+    scenarios = [
+        # (action, position_pnl, has_position, price_change_pct, description)
+        ("HOLD", 50.0, True, 0.5, "Hold profitable position with continued gains"),
+        ("HOLD", 50.0, True, -0.3, "Hold profitable position with small pullback"),
+        ("HOLD", -30.0, True, 0.8, "Hold losing position that recovers"),
+        ("HOLD", -30.0, True, -0.5, "Hold losing position that continues down"),
+        ("SELL", 50.0, True, 0.0, "Close profitable position"),
+        ("SELL", -30.0, True, 0.0, "Close losing position (good)"),
+        ("BUY", 0.0, False, 1.0, "New buy position with immediate gain"),
+        ("HOLD", 0.0, False, 0.1, "Hold with no position (stable market)"),
+    ]
+    
+    print("\n📊 SOPHISTICATED REWARD CALCULATION TESTS:")
+    print("-" * 80)
+    
+    for i, (action, position_pnl, has_position, price_change_pct, description) in enumerate(scenarios, 1):
+        # Test sophisticated reward calculation
+        reward, was_correct = orchestrator._calculate_sophisticated_reward(
+            predicted_action=action,
+            prediction_confidence=0.8,
+            price_change_pct=price_change_pct,
+            time_diff_minutes=5.0,
+            has_price_prediction=False,
+            symbol="ETH/USDT",
+            has_position=has_position,
+            current_position_pnl=position_pnl
+        )
+        
+        print(f"{i:2d}. {description}")
+        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}, Price Change: {price_change_pct:+.1f}%")
+        print(f"    Reward: {reward:+.3f}, Correct: {was_correct}")
+        print()
+    
+    print("\n🧠 CNN POSITION-ENHANCED REWARD TESTS:")
+    print("-" * 80)
+    
+    # Initialize CNN model
+    cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
+    
+    for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
+        base_reward = 0.5  # Moderate base reward
+        enhanced_reward = cnn_model._calculate_position_enhanced_reward(
+            base_reward=base_reward,
+            action=action,
+            position_pnl=position_pnl,
+            has_position=has_position
+        )
+        
+        enhancement = enhanced_reward - base_reward
+        print(f"{i:2d}. {description}")
+        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}")
+        print(f"    Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f} (Δ{enhancement:+.3f})")
+        print()
+    
+    print("\n🤖 DQN POSITION-ENHANCED REWARD TESTS:")
+    print("-" * 80)
+    
+    for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
+        base_reward = 0.5  # Moderate base reward
+        enhanced_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(
+            base_reward=base_reward,
+            action=action,
+            position_pnl=position_pnl,
+            has_position=has_position
+        )
+        
+        enhancement = enhanced_reward - base_reward
+        print(f"{i:2d}. {description}")
+        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}")
+        print(f"    Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f} (Δ{enhancement:+.3f})")
+        print()
+
+def test_reward_incentives():
+    """Test that rewards properly incentivize desired behaviors"""
+    
+    print("\n🎯 REWARD INCENTIVE VALIDATION:")
+    print("-" * 50)
+    
+    orchestrator = TradingOrchestrator()
+    cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
+    
+    # Test 1: Holding winners vs holding losers
+    print("1. HOLD action comparison:")
+    
+    hold_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", 100.0, True)
+    hold_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
+    
+    print(f"   Hold profitable position (+$100): {hold_winner_reward:+.3f}")
+    print(f"   Hold losing position (-$100):     {hold_loser_reward:+.3f}")
+    print(f"   ✅ Incentive correct: {hold_winner_reward > hold_loser_reward}")
+    
+    # Test 2: Closing losers vs closing winners
+    print("\n2. SELL action comparison:")
+    
+    sell_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", 100.0, True)
+    sell_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", -100.0, True)
+    
+    print(f"   Sell profitable position (+$100): {sell_winner_reward:+.3f}")
+    print(f"   Sell losing position (-$100):     {sell_loser_reward:+.3f}")
+    print(f"   ✅ Incentive correct: {sell_loser_reward > sell_winner_reward}")
+    
+    # Test 3: DQN reward scaling
+    print("\n3. DQN vs CNN reward scaling:")
+    
+    dqn_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(0.5, "HOLD", -100.0, True)
+    cnn_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
+    
+    print(f"   DQN penalty for holding loser: {dqn_reward:+.3f}")
+    print(f"   CNN penalty for holding loser: {cnn_reward:+.3f}")
+    print(f"   ✅ DQN more sensitive: {abs(dqn_reward) > abs(cnn_reward)}")
+
+def main():
+    """Run all position-based reward tests"""
+    try:
+        test_position_reward_scenarios()
+        test_reward_incentives()
+        
+        print("\n🚀 POSITION-BASED REWARD SYSTEM VALIDATION COMPLETE!")
+        print("✅ System properly incentivizes:")
+        print("   • Holding profitable positions (let winners run)")
+        print("   • Closing losing positions (cut losses)")
+        print("   • Taking appropriate action based on P&L")
+        print("   • Different reward scaling for CNN vs DQN models")
+        
+    except Exception as e:
+        print(f"❌ Test failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@ -964,6 +964,7 @@ class CleanTradingDashboard:
        )
        def update_metrics(n):
            """Update key metrics - ENHANCED with position sync monitoring"""
+            logger.debug(f"update_metrics callback triggered (n={n})")
            try:
                # PERIODIC POSITION SYNC: Every 30 seconds, verify position sync
                if n % 30 == 0 and n > 0:  # Skip initial load (n=0)
@ -1102,7 +1103,14 @@ class CleanTradingDashboard:
                # For simulation, show starting balance + session P&L
                current_balance = self._cached_live_balance if hasattr(self, '_cached_live_balance') else self._get_initial_balance()
                portfolio_value = current_balance + total_session_pnl  # Live balance + unrealized P&L
-                portfolio_str = f"${portfolio_value:.2f}"
+                
+                # Add max position info to portfolio display
+                try:
+                    max_position_info = self._calculate_max_position_display()
+                    portfolio_str = f"${portfolio_value:.2f} | {max_position_info}"
+                except Exception as e:
+                    logger.error(f"Error calculating max position display: {e}")
+                    portfolio_str = f"${portfolio_value:.2f}"
                
                # Profitability multiplier - get from trading executor
                profitability_multiplier = 0.0
@ -1352,6 +1360,11 @@ class CleanTradingDashboard:
                    logger.debug(f"Metrics data keys: {list(metrics_data.keys())}")
                    if 'loaded_models' in metrics_data:
                        logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}")
+                        logger.debug(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}")
+                    else:
+                        logger.warning("No 'loaded_models' key in metrics_data!")
+                else:
+                    logger.warning(f"Invalid metrics_data: {metrics_data}")
                return self.component_manager.format_training_metrics(metrics_data)
            except PreventUpdate:
                raise
@ -1646,6 +1659,38 @@ class CleanTradingDashboard:
            logger.debug(f"Error calculating opening fee: {e}")
            return position_size_usd * 0.0006  # Fallback to 0.06%
    
+    def _calculate_max_position_display(self) -> str:
+        """Calculate and display maximum position size based on current balance and leverage"""
+        try:
+            # Get current balance
+            current_balance = self._get_live_account_balance()
+            if current_balance <= 0:
+                return "No Balance"
+            
+            # Get current leverage
+            leverage = getattr(self, 'current_leverage', 50)  # Default to 50x
+            
+            # Get current price for ETH/USDT
+            current_price = self._get_current_price('ETH/USDT')
+            if not current_price or current_price <= 0:
+                return "Price N/A"
+            
+            # Calculate maximum position value (balance * leverage)
+            max_position_value = current_balance * leverage
+            
+            # Calculate maximum ETH quantity
+            max_eth_quantity = max_position_value / current_price
+            
+            # Format display
+            if max_eth_quantity >= 0.01:  # Show in ETH if >= 0.01
+                return f"${max_position_value:.1f} ({max_eth_quantity:.2f} ETH)"
+            else:
+                return f"${max_position_value:.1f} ({max_eth_quantity:.4f} ETH)"
+                
+        except Exception as e:
+            logger.debug(f"Error calculating max position display: {e}")
+            return "Calc Error"
+    
    def _calculate_closing_fee(self, current_price: float, quantity: float) -> float:
        """Calculate closing fee for a position at current price"""
        try:
@ -3532,11 +3577,22 @@ class CleanTradingDashboard:
            if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'):
                try:
                    model_states = self.orchestrator.get_model_states()
-                    logger.debug(f"Retrieved model states from orchestrator: {model_states}")
+                    logger.debug(f"Retrieved model states from orchestrator: {list(model_states.keys()) if model_states else 'None'}")
                except Exception as e:
                    logger.error(f"Error getting model states from orchestrator: {e}")
                    model_states = None
            
+            # Also try to get orchestrator statistics for debugging
+            if self.orchestrator:
+                try:
+                    all_stats = self.orchestrator.get_model_statistics()
+                    if all_stats:
+                        logger.debug(f"Available orchestrator statistics: {list(all_stats.keys())}")
+                    else:
+                        logger.debug("No orchestrator statistics available")
+                except Exception as e:
+                    logger.debug(f"Error getting orchestrator statistics: {e}")
+            
            # Fallback if orchestrator not available or returns None
            if model_states is None:
                logger.warning("No model states available from orchestrator, using fallback")
@ -3549,6 +3605,26 @@ class CleanTradingDashboard:
                    'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
                }
            
+            # Create mapping for model states to handle both old and new model names
+            if model_states and self.orchestrator:
+                # Map new registry names to old dashboard names for compatibility
+                registry_to_dashboard_mapping = {
+                    'dqn_agent': 'dqn',
+                    'enhanced_cnn': 'cnn',
+                    'cob_rl_model': 'cob_rl',
+                    'decision_fusion': 'decision_fusion',
+                    'transformer': 'transformer'
+                }
+                
+                # Copy states from new names to old names if they exist
+                for registry_name, dashboard_name in registry_to_dashboard_mapping.items():
+                    if registry_name in model_states and dashboard_name not in model_states:
+                        model_states[dashboard_name] = model_states[registry_name]
+                        logger.debug(f"Mapped model state {registry_name} -> {dashboard_name}")
+                    elif dashboard_name not in model_states:
+                        # Ensure we have a state for the dashboard name
+                        model_states[dashboard_name] = {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
+            
            # Get latest predictions from all models
            latest_predictions = self._get_latest_model_predictions()
            cnn_prediction = self._get_cnn_pivot_prediction()
@ -3598,6 +3674,23 @@ class CleanTradingDashboard:
                        "transformer": {"inference_enabled": True, "training_enabled": True}
                    }
            
+            # Create mapping for backward compatibility between old dashboard names and new registry names
+            model_name_mapping = {
+                'dqn': 'dqn_agent',
+                'cnn': 'enhanced_cnn', 
+                'cob_rl': 'cob_rl_model',
+                'decision_fusion': 'decision_fusion',
+                'transformer': 'transformer'
+            }
+            
+            # Ensure we have toggle states for the old names used by the dashboard
+            for old_name, new_name in model_name_mapping.items():
+                if old_name not in toggle_states and new_name in toggle_states:
+                    toggle_states[old_name] = toggle_states[new_name]
+                elif old_name not in toggle_states:
+                    # Default state if neither old nor new name exists
+                    toggle_states[old_name] = {"inference_enabled": True, "training_enabled": True}
+            
            # Helper function to safely calculate improvement percentage
            def safe_improvement_calc(initial, current, default_improvement=0.0):
                try:
@ -3705,8 +3798,8 @@ class CleanTradingDashboard:
                        last_confidence = 0.68
                        last_timestamp = datetime.now().strftime('%H:%M:%S')
            
-            # Get real DQN statistics from orchestrator
-            dqn_stats = orchestrator_stats.get('dqn_agent')
+            # Get real DQN statistics from orchestrator (try both old and new names)
+            dqn_stats = orchestrator_stats.get('dqn_agent') or orchestrator_stats.get('dqn')
            dqn_current_loss = dqn_stats.current_loss if dqn_stats else None
            dqn_best_loss = dqn_stats.best_loss if dqn_stats else None
            dqn_accuracy = dqn_stats.accuracy if dqn_stats else None
@ -3786,8 +3879,8 @@ class CleanTradingDashboard:
            cnn_state = model_states.get('cnn', {})
            cnn_timing = get_model_timing_info('CNN')
            
-            # Get real CNN statistics from orchestrator
-            cnn_stats = orchestrator_stats.get('enhanced_cnn')
+            # Get real CNN statistics from orchestrator (try both old and new names)
+            cnn_stats = orchestrator_stats.get('enhanced_cnn') or orchestrator_stats.get('cnn')
            cnn_active = cnn_stats is not None
            
            # Get latest CNN prediction from orchestrator statistics
@ -4153,12 +4246,15 @@ class CleanTradingDashboard:
            
            # DEBUG: Log what we're returning
            models_count = len(metrics.get('loaded_models', {}))
-            logger.info(f"Training metrics being returned: {models_count} models loaded")
+            logger.debug(f"Training metrics being returned: {models_count} models loaded")
            if models_count == 0:
                logger.warning("No models in loaded_models!")
                logger.warning(f"Metrics keys: {list(metrics.keys())}")
-            for model_name, model_info in metrics.get('loaded_models', {}).items():
-                logger.info(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}")
+                logger.warning(f"Model states available: {list(model_states.keys()) if model_states else 'None'}")
+                logger.warning(f"Toggle states available: {list(toggle_states.keys()) if toggle_states else 'None'}")
+            else:
+                for model_name, model_info in metrics.get('loaded_models', {}).items():
+                    logger.debug(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}")
            
            return metrics