PnL in reward, show leveraged power in dash (broken)

2025-07-29 17:42:00 +03:00
parent d35530a9e9
commit 3a532a1220
5 changed files with 553 additions and 49 deletions
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@ -3,6 +3,7 @@ import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import os
 import time
 import logging
 import torch.nn.functional as F
 from typing import List, Tuple, Dict, Any, Optional, Union
@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module):
                'weighted_strength': 0.0
            }
-    def add_training_data(self, state, action, reward):
+    def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
        """
-        Add training data to the model's training buffer
+        Add training data to the model's training buffer with position-based reward enhancement
        Args:
            state: Input state
            action: Action taken
-            reward: Reward received
+            reward: Base reward received
            position_pnl: Current position P&L (0.0 if no position)
            has_position: Whether we currently have an open position
        """
        try:
            # Enhance reward based on position status
            enhanced_reward = self._calculate_position_enhanced_reward(
                reward, action, position_pnl, has_position
            )
            self.training_data.append({
                'state': state,
                'action': action,
-                'reward': reward,
+                'reward': enhanced_reward,
                'base_reward': reward,  # Keep original reward for analysis
                'position_pnl': position_pnl,
                'has_position': has_position,
                'timestamp': time.time()
            })
@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module):
        except Exception as e:
            logger.error(f"Error adding training data: {e}")
    def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
        """
        Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
        Args:
            base_reward: Original reward from price prediction accuracy
            action: Action taken ('BUY', 'SELL', 'HOLD')
            position_pnl: Current position P&L
            has_position: Whether we have an open position
        Returns:
            Enhanced reward that incentivizes profitable behavior
        """
        try:
            enhanced_reward = base_reward
            if has_position and position_pnl != 0.0:
                # Position-based reward adjustments
                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
                if position_pnl > 0:  # Profitable position
                    if action == "HOLD":
                        # Reward holding profitable positions (let winners run)
                        enhanced_reward += abs(pnl_factor) * 0.5
                    elif action in ["BUY", "SELL"]:
                        # Moderate reward for taking action on profitable positions
                        enhanced_reward += abs(pnl_factor) * 0.3
                elif position_pnl < 0:  # Losing position
                    if action == "HOLD":
                        # Penalty for holding losing positions (cut losses)
                        enhanced_reward -= abs(pnl_factor) * 0.8
                    elif action in ["BUY", "SELL"]:
                        # Reward for taking action to close losing positions
                        enhanced_reward += abs(pnl_factor) * 0.6
            # Ensure reward doesn't become extreme
            enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
            return enhanced_reward
        except Exception as e:
            logger.error(f"Error calculating position-enhanced reward: {e}")
            return base_reward
    def save(self, path):
        """Save model weights and architecture"""
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@ -3267,12 +3267,15 @@ class TradingOrchestrator:
            avg_loss = model_stats.average_loss if model_stats else None
            # Calculate reward for logging
            current_pnl = self._get_current_position_pnl(self.symbol)
            reward, _ = self._calculate_sophisticated_reward(
                predicted_action,
                predicted_confidence,
                actual_price_change_pct,
                time_diff_seconds / 60,  # Convert to minutes
                has_price_prediction=predicted_price is not None,
                symbol=self.symbol,
                current_position_pnl=current_pnl,
            )
            # Enhanced logging with detailed information
@ -3361,6 +3364,7 @@ class TradingOrchestrator:
            )  # Default to 0.5 if missing
            # Calculate sophisticated reward based on multiple factors
            current_pnl = self._get_current_position_pnl(symbol)
            reward, was_correct = self._calculate_sophisticated_reward(
                predicted_action,
                prediction_confidence,
@ -3369,6 +3373,7 @@ class TradingOrchestrator:
                inference_price is not None,  # Add price prediction flag
                symbol,  # Pass symbol for position lookup
                None,  # Let method determine position status
                current_position_pnl=current_pnl,
            )
            # Update model performance tracking
@ -3476,10 +3481,11 @@ class TradingOrchestrator:
        has_price_prediction: bool = False,
        symbol: str = None,
        has_position: bool = None,
        current_position_pnl: float = 0.0,
    ) -> tuple[float, bool]:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
-        Now considers position status when evaluating HOLD decisions
+        Now considers position status and current P&L when evaluating decisions
        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@ -3489,6 +3495,7 @@ class TradingOrchestrator:
            has_price_prediction: Whether the model made a price prediction
            symbol: Trading symbol (for position lookup)
            has_position: Whether we currently have a position (if None, will be looked up)
            current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
        Returns:
            tuple: (reward, was_correct)
@ -3500,6 +3507,9 @@ class TradingOrchestrator:
            # Determine current position status if not provided
            if has_position is None and symbol:
                has_position = self._has_open_position(symbol)
                # Get current position P&L if we have a position
                if has_position and current_position_pnl == 0.0:
                    current_position_pnl = self._get_current_position_pnl(symbol)
            elif has_position is None:
                has_position = False
@ -3518,19 +3528,37 @@ class TradingOrchestrator:
                    0, -price_change_pct
                )  # Positive for downward movement
            elif predicted_action == "HOLD":
-                # HOLD evaluation now considers position status
+                # HOLD evaluation now considers position status AND current P&L
                if has_position:
-                    # If we have a position, HOLD is correct if price moved favorably or stayed stable
+                    # If we have a position, HOLD evaluation depends on P&L and price movement
-                    # This prevents penalizing HOLD when we're already in a profitable position
+                    if current_position_pnl > 0:  # Currently profitable position
-                    if price_change_pct > 0:  # Price went up while holding - good
+                        # Holding a profitable position is good if price continues favorably
-                        was_correct = True
+                        if price_change_pct > 0:  # Price went up while holding profitable position - excellent
-                        directional_accuracy = price_change_pct  # Reward based on profit
+                            was_correct = True
-                    elif abs(price_change_pct) < movement_threshold:  # Price stable - neutral
+                            directional_accuracy = price_change_pct * 1.5  # Bonus for holding winners
-                        was_correct = True
+                        elif abs(price_change_pct) < movement_threshold:  # Price stable - good
-                        directional_accuracy = movement_threshold - abs(price_change_pct)
+                            was_correct = True
-                    else:  # Price dropped while holding - bad, but less penalty than wrong direction
+                            directional_accuracy = movement_threshold + (current_position_pnl / 100.0)  # Reward based on existing profit
-                        was_correct = False
+                        else:  # Price dropped while holding profitable position - still okay but less reward
-                        directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
+                            was_correct = True
                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
                    elif current_position_pnl < 0:  # Currently losing position
                        # Holding a losing position is generally bad - should consider closing
                        if price_change_pct > movement_threshold:  # Price recovered - good hold
                            was_correct = True
                            directional_accuracy = price_change_pct * 0.8  # Reduced reward for recovery
                        else:  # Price continued down or stayed flat - bad hold
                            was_correct = False
                            # Penalty proportional to loss magnitude
                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.5  # Penalty for holding losers
                    else:  # Breakeven position
                        # Standard HOLD evaluation for breakeven positions
                        if abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
                            directional_accuracy = movement_threshold - abs(price_change_pct)
                        else:  # Price moved significantly - missed opportunity
                            was_correct = False
                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
                else:
                    # If we don't have a position, HOLD is correct if price stayed relatively stable
                    was_correct = abs(price_change_pct) < movement_threshold
@ -3627,12 +3655,16 @@ class TradingOrchestrator:
            # Calculate reward if not provided
            if sophisticated_reward is None:
                symbol = record.get("symbol", self.symbol)
                current_pnl = self._get_current_position_pnl(symbol)
                sophisticated_reward, _ = self._calculate_sophisticated_reward(
                    record.get("action", "HOLD"),
                    record.get("confidence", 0.5),
                    price_change_pct,
                    record.get("time_diff_minutes", 1.0),
                    record.get("has_price_prediction", False),
                    symbol=symbol,
                    current_position_pnl=current_pnl,
                )
            # Train decision fusion model if it's the model being evaluated
@ -6510,7 +6542,7 @@ class TradingOrchestrator:
            logger.error(f"Error getting combined model data for {symbol}: {e}")
            return None
-    def _get_current_position_pnl(self, symbol: str, current_price: float) -> float:
+    def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
        """Get current position P&L for the symbol"""
        try:
            if self.trading_executor and hasattr(
@ -6518,16 +6550,22 @@ class TradingOrchestrator:
            ):
                position = self.trading_executor.get_current_position(symbol)
                if position:
-                    entry_price = position.get("price", 0)
+                    # If current_price is provided, calculate P&L manually
-                    size = position.get("size", 0)
+                    if current_price is not None:
-                    side = position.get("side", "LONG")
+                        entry_price = position.get("price", 0)
                        size = position.get("size", 0)
                        side = position.get("side", "LONG")
-                    if entry_price and size > 0:
+                        if entry_price and size > 0:
-                        if side.upper() == "LONG":
+                            if side.upper() == "LONG":
-                            pnl = (current_price - entry_price) * size
+                                pnl = (current_price - entry_price) * size
-                        else:  # SHORT
+                            else:  # SHORT
-                            pnl = (entry_price - current_price) * size
+                                pnl = (entry_price - current_price) * size
-                        return pnl
+                            return pnl
                    else:
                        # Use unrealized_pnl from position if available
                        if position.get("size", 0) > 0:
                            return float(position.get("unrealized_pnl", 0.0))
            return 0.0
        except Exception as e:
            logger.debug(f"Error getting position P&L for {symbol}: {e}")
@ -6545,6 +6583,53 @@ class TradingOrchestrator:
        except Exception:
            return False
    def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
        """
        Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
        Args:
            base_reward: Original reward from confidence/execution
            action: Action taken ('BUY', 'SELL', 'HOLD')
            position_pnl: Current position P&L
            has_position: Whether we have an open position
        Returns:
            Enhanced reward that incentivizes profitable behavior
        """
        try:
            enhanced_reward = base_reward
            if has_position and position_pnl != 0.0:
                # Position-based reward adjustments (similar to CNN but tuned for DQN)
                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
                if position_pnl > 0:  # Profitable position
                    if action == "HOLD":
                        # Reward holding profitable positions (let winners run)
                        enhanced_reward += abs(pnl_factor) * 0.4
                    elif action in ["BUY", "SELL"]:
                        # Moderate reward for taking action on profitable positions
                        enhanced_reward += abs(pnl_factor) * 0.2
                elif position_pnl < 0:  # Losing position
                    if action == "HOLD":
                        # Strong penalty for holding losing positions (cut losses)
                        enhanced_reward -= abs(pnl_factor) * 1.0
                    elif action in ["BUY", "SELL"]:
                        # Strong reward for taking action to close losing positions
                        enhanced_reward += abs(pnl_factor) * 0.8
            # Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
            enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
            return enhanced_reward
        except Exception as e:
            logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
            return base_reward
    def _close_all_positions(self):
        """Close all open positions when clearing session"""
        try:
@ -6889,28 +6974,35 @@ class TradingOrchestrator:
                                action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
                                dqn_action = action_mapping.get(action, 2)
-                                # Calculate immediate reward based on confidence and execution
+                                # Get position information for enhanced rewards
-                                immediate_reward = confidence if action != "HOLD" else 0.0
+                                has_position = self._has_open_position(symbol)
                                position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
                                # Calculate position-enhanced reward
                                base_reward = confidence if action != "HOLD" else 0.1
                                enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
                                    base_reward, action, position_pnl, has_position
                                )
                                # Add experience to DQN
                                self.rl_agent.remember(
                                    state=state,
                                    action=dqn_action,
-                                    reward=immediate_reward,
+                                    reward=enhanced_reward,
                                    next_state=state,  # Will be updated with actual outcome later
                                    done=False,
                                )
                                models_trained.append("dqn")
                                logger.debug(
-                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
+                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
                                )
                except Exception as e:
                    logger.debug(f"Error training DQN on decision: {e}")
            # Train CNN model if available and enabled
-            if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"):
+            if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
                try:
                    # Create CNN input features from base_data (same as inference)
                    cnn_features = self._create_cnn_features_from_base_data(
@ -6919,19 +7011,30 @@ class TradingOrchestrator:
                    # Create target based on action
                    target_mapping = {
-                        "BUY": [1, 0, 0],
+                        "BUY": 0,  # Action indices for CNN
-                        "SELL": [0, 1, 0],
+                        "SELL": 1,
-                        "HOLD": [0, 0, 1],
+                        "HOLD": 2,
                    }
-                    target = target_mapping.get(action, [0, 0, 1])
+                    target_action = target_mapping.get(action, 2)
-                    # Add training sample
+                    # Get position information for enhanced rewards
-                    self.cnn_model.add_training_sample(
+                    has_position = self._has_open_position(symbol)
-                        cnn_features, target, weight=confidence
+                    position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
                    # Calculate base reward from confidence and add position-based enhancement
                    base_reward = confidence if action != "HOLD" else 0.1
                    # Add training data with position-based reward enhancement
                    self.cnn_model.add_training_data(
                        cnn_features, 
                        target_action, 
                        base_reward,
                        position_pnl=position_pnl,
                        has_position=has_position
                    )
                    models_trained.append("cnn")
-                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol}")
+                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")
                except Exception as e:
                    logger.debug(f"Error training CNN on decision: {e}")
--- a/test_dashboard_data_flow.py
+++ b/test_dashboard_data_flow.py
@ -0,0 +1,90 @@
 #!/usr/bin/env python3
 """
 Test script to debug dashboard data flow issues
 This script tests if the dashboard can properly retrieve and display model data.
 """
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 import logging
 logging.basicConfig(level=logging.DEBUG)
 from web.clean_dashboard import CleanTradingDashboard
 from core.orchestrator import TradingOrchestrator
 from core.data_provider import DataProvider
 def test_dashboard_data_flow():
    """Test if dashboard can retrieve model data correctly"""
    print("🧪 DASHBOARD DATA FLOW TEST")
    print("=" * 50)
    try:
        # Initialize components
        data_provider = DataProvider()
        orchestrator = TradingOrchestrator(data_provider=data_provider)
        print(f"✅ Orchestrator initialized")
        print(f"   Model registry models: {list(orchestrator.model_registry.get_all_models().keys())}")
        print(f"   Model toggle states: {list(orchestrator.model_toggle_states.keys())}")
        # Initialize dashboard
        dashboard = CleanTradingDashboard(
            data_provider=data_provider,
            orchestrator=orchestrator
        )
        print(f"✅ Dashboard initialized")
        # Test available models
        available_models = dashboard._get_available_models()
        print(f"   Available models: {list(available_models.keys())}")
        # Test training metrics
        print("\n📊 Testing training metrics...")
        toggle_states = {}
        for model_name in available_models.keys():
            toggle_states[model_name] = orchestrator.get_model_toggle_state(model_name)
        print(f"   Toggle states: {list(toggle_states.keys())}")
        metrics_data = dashboard._get_training_metrics(toggle_states)
        print(f"   Metrics data type: {type(metrics_data)}")
        if metrics_data and isinstance(metrics_data, dict):
            print(f"   Metrics keys: {list(metrics_data.keys())}")
            if 'loaded_models' in metrics_data:
                loaded_models = metrics_data['loaded_models']
                print(f"   Loaded models count: {len(loaded_models)}")
                for model_name, model_info in loaded_models.items():
                    print(f"     - {model_name}: active={model_info.get('active', False)}")
            else:
                print("   ❌ No 'loaded_models' in metrics_data!")
        else:
            print(f"   ❌ Invalid metrics_data: {metrics_data}")
        # Test component manager formatting
        print("\n🎨 Testing component manager...")
        formatted_components = dashboard.component_manager.format_training_metrics(metrics_data)
        print(f"   Formatted components type: {type(formatted_components)}")
        print(f"   Formatted components count: {len(formatted_components) if formatted_components else 0}")
        if formatted_components:
            print("   ✅ Component manager returned formatted data")
        else:
            print("   ❌ Component manager returned empty data")
        print("\n🚀 Dashboard data flow test completed!")
        return True
    except Exception as e:
        print(f"❌ Test failed with error: {e}")
        import traceback
        traceback.print_exc()
        return False
 if __name__ == "__main__":
    test_dashboard_data_flow()
--- a/test_position_based_rewards.py
+++ b/test_position_based_rewards.py
@ -0,0 +1,159 @@
 #!/usr/bin/env python3
 """
 Test script for position-based reward system
 This script tests the enhanced reward calculations that incentivize:
 1. Holding profitable positions (let winners run)
 2. Closing losing positions (cut losses)
 3. Taking action when appropriate based on P&L
 """
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 from core.orchestrator import TradingOrchestrator
 from NN.models.enhanced_cnn import EnhancedCNN
 import numpy as np
 def test_position_reward_scenarios():
    """Test various position-based reward scenarios"""
    print("🧪 POSITION-BASED REWARD SYSTEM TEST")
    print("=" * 50)
    # Initialize orchestrator
    orchestrator = TradingOrchestrator()
    # Test scenarios
    scenarios = [
        # (action, position_pnl, has_position, price_change_pct, description)
        ("HOLD", 50.0, True, 0.5, "Hold profitable position with continued gains"),
        ("HOLD", 50.0, True, -0.3, "Hold profitable position with small pullback"),
        ("HOLD", -30.0, True, 0.8, "Hold losing position that recovers"),
        ("HOLD", -30.0, True, -0.5, "Hold losing position that continues down"),
        ("SELL", 50.0, True, 0.0, "Close profitable position"),
        ("SELL", -30.0, True, 0.0, "Close losing position (good)"),
        ("BUY", 0.0, False, 1.0, "New buy position with immediate gain"),
        ("HOLD", 0.0, False, 0.1, "Hold with no position (stable market)"),
    ]
    print("\n📊 SOPHISTICATED REWARD CALCULATION TESTS:")
    print("-" * 80)
    for i, (action, position_pnl, has_position, price_change_pct, description) in enumerate(scenarios, 1):
        # Test sophisticated reward calculation
        reward, was_correct = orchestrator._calculate_sophisticated_reward(
            predicted_action=action,
            prediction_confidence=0.8,
            price_change_pct=price_change_pct,
            time_diff_minutes=5.0,
            has_price_prediction=False,
            symbol="ETH/USDT",
            has_position=has_position,
            current_position_pnl=position_pnl
        )
        print(f"{i:2d}. {description}")
        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}, Price Change: {price_change_pct:+.1f}%")
        print(f"    Reward: {reward:+.3f}, Correct: {was_correct}")
        print()
    print("\n🧠 CNN POSITION-ENHANCED REWARD TESTS:")
    print("-" * 80)
    # Initialize CNN model
    cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
    for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
        base_reward = 0.5  # Moderate base reward
        enhanced_reward = cnn_model._calculate_position_enhanced_reward(
            base_reward=base_reward,
            action=action,
            position_pnl=position_pnl,
            has_position=has_position
        )
        enhancement = enhanced_reward - base_reward
        print(f"{i:2d}. {description}")
        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}")
        print(f"    Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f} (Δ{enhancement:+.3f})")
        print()
    print("\n🤖 DQN POSITION-ENHANCED REWARD TESTS:")
    print("-" * 80)
    for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
        base_reward = 0.5  # Moderate base reward
        enhanced_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(
            base_reward=base_reward,
            action=action,
            position_pnl=position_pnl,
            has_position=has_position
        )
        enhancement = enhanced_reward - base_reward
        print(f"{i:2d}. {description}")
        print(f"    Action: {action}, P&L: ${position_pnl:+.1f}")
        print(f"    Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f} (Δ{enhancement:+.3f})")
        print()
 def test_reward_incentives():
    """Test that rewards properly incentivize desired behaviors"""
    print("\n🎯 REWARD INCENTIVE VALIDATION:")
    print("-" * 50)
    orchestrator = TradingOrchestrator()
    cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
    # Test 1: Holding winners vs holding losers
    print("1. HOLD action comparison:")
    hold_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", 100.0, True)
    hold_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
    print(f"   Hold profitable position (+$100): {hold_winner_reward:+.3f}")
    print(f"   Hold losing position (-$100):     {hold_loser_reward:+.3f}")
    print(f"   ✅ Incentive correct: {hold_winner_reward > hold_loser_reward}")
    # Test 2: Closing losers vs closing winners
    print("\n2. SELL action comparison:")
    sell_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", 100.0, True)
    sell_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", -100.0, True)
    print(f"   Sell profitable position (+$100): {sell_winner_reward:+.3f}")
    print(f"   Sell losing position (-$100):     {sell_loser_reward:+.3f}")
    print(f"   ✅ Incentive correct: {sell_loser_reward > sell_winner_reward}")
    # Test 3: DQN reward scaling
    print("\n3. DQN vs CNN reward scaling:")
    dqn_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(0.5, "HOLD", -100.0, True)
    cnn_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
    print(f"   DQN penalty for holding loser: {dqn_reward:+.3f}")
    print(f"   CNN penalty for holding loser: {cnn_reward:+.3f}")
    print(f"   ✅ DQN more sensitive: {abs(dqn_reward) > abs(cnn_reward)}")
 def main():
    """Run all position-based reward tests"""
    try:
        test_position_reward_scenarios()
        test_reward_incentives()
        print("\n🚀 POSITION-BASED REWARD SYSTEM VALIDATION COMPLETE!")
        print("✅ System properly incentivizes:")
        print("   • Holding profitable positions (let winners run)")
        print("   • Closing losing positions (cut losses)")
        print("   • Taking appropriate action based on P&L")
        print("   • Different reward scaling for CNN vs DQN models")
    except Exception as e:
        print(f"❌ Test failed with error: {e}")
        import traceback
        traceback.print_exc()
 if __name__ == "__main__":
    main()
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@ -964,6 +964,7 @@ class CleanTradingDashboard:
        )
        def update_metrics(n):
            """Update key metrics - ENHANCED with position sync monitoring"""
            logger.debug(f"update_metrics callback triggered (n={n})")
            try:
                # PERIODIC POSITION SYNC: Every 30 seconds, verify position sync
                if n % 30 == 0 and n > 0:  # Skip initial load (n=0)
@ -1102,7 +1103,14 @@ class CleanTradingDashboard:
                # For simulation, show starting balance + session P&L
                current_balance = self._cached_live_balance if hasattr(self, '_cached_live_balance') else self._get_initial_balance()
                portfolio_value = current_balance + total_session_pnl  # Live balance + unrealized P&L
-                portfolio_str = f"${portfolio_value:.2f}"
+                
                # Add max position info to portfolio display
                try:
                    max_position_info = self._calculate_max_position_display()
                    portfolio_str = f"${portfolio_value:.2f} | {max_position_info}"
                except Exception as e:
                    logger.error(f"Error calculating max position display: {e}")
                    portfolio_str = f"${portfolio_value:.2f}"
                # Profitability multiplier - get from trading executor
                profitability_multiplier = 0.0
@ -1352,6 +1360,11 @@ class CleanTradingDashboard:
                    logger.debug(f"Metrics data keys: {list(metrics_data.keys())}")
                    if 'loaded_models' in metrics_data:
                        logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}")
                        logger.debug(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}")
                    else:
                        logger.warning("No 'loaded_models' key in metrics_data!")
                else:
                    logger.warning(f"Invalid metrics_data: {metrics_data}")
                return self.component_manager.format_training_metrics(metrics_data)
            except PreventUpdate:
                raise
@ -1646,6 +1659,38 @@ class CleanTradingDashboard:
            logger.debug(f"Error calculating opening fee: {e}")
            return position_size_usd * 0.0006  # Fallback to 0.06%
    def _calculate_max_position_display(self) -> str:
        """Calculate and display maximum position size based on current balance and leverage"""
        try:
            # Get current balance
            current_balance = self._get_live_account_balance()
            if current_balance <= 0:
                return "No Balance"
            # Get current leverage
            leverage = getattr(self, 'current_leverage', 50)  # Default to 50x
            # Get current price for ETH/USDT
            current_price = self._get_current_price('ETH/USDT')
            if not current_price or current_price <= 0:
                return "Price N/A"
            # Calculate maximum position value (balance * leverage)
            max_position_value = current_balance * leverage
            # Calculate maximum ETH quantity
            max_eth_quantity = max_position_value / current_price
            # Format display
            if max_eth_quantity >= 0.01:  # Show in ETH if >= 0.01
                return f"${max_position_value:.1f} ({max_eth_quantity:.2f} ETH)"
            else:
                return f"${max_position_value:.1f} ({max_eth_quantity:.4f} ETH)"
        except Exception as e:
            logger.debug(f"Error calculating max position display: {e}")
            return "Calc Error"
    def _calculate_closing_fee(self, current_price: float, quantity: float) -> float:
        """Calculate closing fee for a position at current price"""
        try:
@ -3532,11 +3577,22 @@ class CleanTradingDashboard:
            if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'):
                try:
                    model_states = self.orchestrator.get_model_states()
-                    logger.debug(f"Retrieved model states from orchestrator: {model_states}")
+                    logger.debug(f"Retrieved model states from orchestrator: {list(model_states.keys()) if model_states else 'None'}")
                except Exception as e:
                    logger.error(f"Error getting model states from orchestrator: {e}")
                    model_states = None
            # Also try to get orchestrator statistics for debugging
            if self.orchestrator:
                try:
                    all_stats = self.orchestrator.get_model_statistics()
                    if all_stats:
                        logger.debug(f"Available orchestrator statistics: {list(all_stats.keys())}")
                    else:
                        logger.debug("No orchestrator statistics available")
                except Exception as e:
                    logger.debug(f"Error getting orchestrator statistics: {e}")
            # Fallback if orchestrator not available or returns None
            if model_states is None:
                logger.warning("No model states available from orchestrator, using fallback")
@ -3549,6 +3605,26 @@ class CleanTradingDashboard:
                    'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
                }
            # Create mapping for model states to handle both old and new model names
            if model_states and self.orchestrator:
                # Map new registry names to old dashboard names for compatibility
                registry_to_dashboard_mapping = {
                    'dqn_agent': 'dqn',
                    'enhanced_cnn': 'cnn',
                    'cob_rl_model': 'cob_rl',
                    'decision_fusion': 'decision_fusion',
                    'transformer': 'transformer'
                }
                # Copy states from new names to old names if they exist
                for registry_name, dashboard_name in registry_to_dashboard_mapping.items():
                    if registry_name in model_states and dashboard_name not in model_states:
                        model_states[dashboard_name] = model_states[registry_name]
                        logger.debug(f"Mapped model state {registry_name} -> {dashboard_name}")
                    elif dashboard_name not in model_states:
                        # Ensure we have a state for the dashboard name
                        model_states[dashboard_name] = {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
            # Get latest predictions from all models
            latest_predictions = self._get_latest_model_predictions()
            cnn_prediction = self._get_cnn_pivot_prediction()
@ -3598,6 +3674,23 @@ class CleanTradingDashboard:
                        "transformer": {"inference_enabled": True, "training_enabled": True}
                    }
            # Create mapping for backward compatibility between old dashboard names and new registry names
            model_name_mapping = {
                'dqn': 'dqn_agent',
                'cnn': 'enhanced_cnn', 
                'cob_rl': 'cob_rl_model',
                'decision_fusion': 'decision_fusion',
                'transformer': 'transformer'
            }
            # Ensure we have toggle states for the old names used by the dashboard
            for old_name, new_name in model_name_mapping.items():
                if old_name not in toggle_states and new_name in toggle_states:
                    toggle_states[old_name] = toggle_states[new_name]
                elif old_name not in toggle_states:
                    # Default state if neither old nor new name exists
                    toggle_states[old_name] = {"inference_enabled": True, "training_enabled": True}
            # Helper function to safely calculate improvement percentage
            def safe_improvement_calc(initial, current, default_improvement=0.0):
                try:
@ -3705,8 +3798,8 @@ class CleanTradingDashboard:
                        last_confidence = 0.68
                        last_timestamp = datetime.now().strftime('%H:%M:%S')
-            # Get real DQN statistics from orchestrator
+            # Get real DQN statistics from orchestrator (try both old and new names)
-            dqn_stats = orchestrator_stats.get('dqn_agent')
+            dqn_stats = orchestrator_stats.get('dqn_agent') or orchestrator_stats.get('dqn')
            dqn_current_loss = dqn_stats.current_loss if dqn_stats else None
            dqn_best_loss = dqn_stats.best_loss if dqn_stats else None
            dqn_accuracy = dqn_stats.accuracy if dqn_stats else None
@ -3786,8 +3879,8 @@ class CleanTradingDashboard:
            cnn_state = model_states.get('cnn', {})
            cnn_timing = get_model_timing_info('CNN')
-            # Get real CNN statistics from orchestrator
+            # Get real CNN statistics from orchestrator (try both old and new names)
-            cnn_stats = orchestrator_stats.get('enhanced_cnn')
+            cnn_stats = orchestrator_stats.get('enhanced_cnn') or orchestrator_stats.get('cnn')
            cnn_active = cnn_stats is not None
            # Get latest CNN prediction from orchestrator statistics
@ -4153,12 +4246,15 @@ class CleanTradingDashboard:
            # DEBUG: Log what we're returning
            models_count = len(metrics.get('loaded_models', {}))
-            logger.info(f"Training metrics being returned: {models_count} models loaded")
+            logger.debug(f"Training metrics being returned: {models_count} models loaded")
            if models_count == 0:
                logger.warning("No models in loaded_models!")
                logger.warning(f"Metrics keys: {list(metrics.keys())}")
-            for model_name, model_info in metrics.get('loaded_models', {}).items():
+                logger.warning(f"Model states available: {list(model_states.keys()) if model_states else 'None'}")
-                logger.info(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}")
+                logger.warning(f"Toggle states available: {list(toggle_states.keys()) if toggle_states else 'None'}")
            else:
                for model_name, model_info in metrics.get('loaded_models', {}).items():
                    logger.debug(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}")
            return metrics