From 3a532a12200298afdfcccc858665a36f0b66b204 Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Tue, 29 Jul 2025 17:42:00 +0300 Subject: [PATCH] PnL in reward, show leveraged power in dash (broken) --- NN/models/enhanced_cnn.py | 64 +++++++++++- core/orchestrator.py | 175 ++++++++++++++++++++++++++------- test_dashboard_data_flow.py | 90 +++++++++++++++++ test_position_based_rewards.py | 159 ++++++++++++++++++++++++++++++ web/clean_dashboard.py | 114 +++++++++++++++++++-- 5 files changed, 553 insertions(+), 49 deletions(-) create mode 100644 test_dashboard_data_flow.py create mode 100644 test_position_based_rewards.py diff --git a/NN/models/enhanced_cnn.py b/NN/models/enhanced_cnn.py index 67ad4c7..789b324 100644 --- a/NN/models/enhanced_cnn.py +++ b/NN/models/enhanced_cnn.py @@ -3,6 +3,7 @@ import torch.nn as nn import torch.optim as optim import numpy as np import os +import time import logging import torch.nn.functional as F from typing import List, Tuple, Dict, Any, Optional, Union @@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module): 'weighted_strength': 0.0 } - def add_training_data(self, state, action, reward): + def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False): """ - Add training data to the model's training buffer + Add training data to the model's training buffer with position-based reward enhancement Args: state: Input state action: Action taken - reward: Reward received + reward: Base reward received + position_pnl: Current position P&L (0.0 if no position) + has_position: Whether we currently have an open position """ try: + # Enhance reward based on position status + enhanced_reward = self._calculate_position_enhanced_reward( + reward, action, position_pnl, has_position + ) + self.training_data.append({ 'state': state, 'action': action, - 'reward': reward, + 'reward': enhanced_reward, + 'base_reward': reward, # Keep original reward for analysis + 'position_pnl': position_pnl, + 'has_position': has_position, 'timestamp': time.time() }) @@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module): except Exception as e: logger.error(f"Error adding training data: {e}") + + def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position): + """ + Calculate position-enhanced reward to incentivize profitable trades and closing losing ones + + Args: + base_reward: Original reward from price prediction accuracy + action: Action taken ('BUY', 'SELL', 'HOLD') + position_pnl: Current position P&L + has_position: Whether we have an open position + + Returns: + Enhanced reward that incentivizes profitable behavior + """ + try: + enhanced_reward = base_reward + + if has_position and position_pnl != 0.0: + # Position-based reward adjustments + pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale + + if position_pnl > 0: # Profitable position + if action == "HOLD": + # Reward holding profitable positions (let winners run) + enhanced_reward += abs(pnl_factor) * 0.5 + elif action in ["BUY", "SELL"]: + # Moderate reward for taking action on profitable positions + enhanced_reward += abs(pnl_factor) * 0.3 + + elif position_pnl < 0: # Losing position + if action == "HOLD": + # Penalty for holding losing positions (cut losses) + enhanced_reward -= abs(pnl_factor) * 0.8 + elif action in ["BUY", "SELL"]: + # Reward for taking action to close losing positions + enhanced_reward += abs(pnl_factor) * 0.6 + + # Ensure reward doesn't become extreme + enhanced_reward = max(-5.0, min(5.0, enhanced_reward)) + + return enhanced_reward + + except Exception as e: + logger.error(f"Error calculating position-enhanced reward: {e}") + return base_reward def save(self, path): """Save model weights and architecture""" diff --git a/core/orchestrator.py b/core/orchestrator.py index 8f9e277..27ff60c 100644 --- a/core/orchestrator.py +++ b/core/orchestrator.py @@ -3267,12 +3267,15 @@ class TradingOrchestrator: avg_loss = model_stats.average_loss if model_stats else None # Calculate reward for logging + current_pnl = self._get_current_position_pnl(self.symbol) reward, _ = self._calculate_sophisticated_reward( predicted_action, predicted_confidence, actual_price_change_pct, time_diff_seconds / 60, # Convert to minutes has_price_prediction=predicted_price is not None, + symbol=self.symbol, + current_position_pnl=current_pnl, ) # Enhanced logging with detailed information @@ -3361,6 +3364,7 @@ class TradingOrchestrator: ) # Default to 0.5 if missing # Calculate sophisticated reward based on multiple factors + current_pnl = self._get_current_position_pnl(symbol) reward, was_correct = self._calculate_sophisticated_reward( predicted_action, prediction_confidence, @@ -3369,6 +3373,7 @@ class TradingOrchestrator: inference_price is not None, # Add price prediction flag symbol, # Pass symbol for position lookup None, # Let method determine position status + current_position_pnl=current_pnl, ) # Update model performance tracking @@ -3476,10 +3481,11 @@ class TradingOrchestrator: has_price_prediction: bool = False, symbol: str = None, has_position: bool = None, + current_position_pnl: float = 0.0, ) -> tuple[float, bool]: """ Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude - Now considers position status when evaluating HOLD decisions + Now considers position status and current P&L when evaluating decisions Args: predicted_action: The predicted action ('BUY', 'SELL', 'HOLD') @@ -3489,6 +3495,7 @@ class TradingOrchestrator: has_price_prediction: Whether the model made a price prediction symbol: Trading symbol (for position lookup) has_position: Whether we currently have a position (if None, will be looked up) + current_position_pnl: Current unrealized P&L of open position (0.0 if no position) Returns: tuple: (reward, was_correct) @@ -3500,6 +3507,9 @@ class TradingOrchestrator: # Determine current position status if not provided if has_position is None and symbol: has_position = self._has_open_position(symbol) + # Get current position P&L if we have a position + if has_position and current_position_pnl == 0.0: + current_position_pnl = self._get_current_position_pnl(symbol) elif has_position is None: has_position = False @@ -3518,19 +3528,37 @@ class TradingOrchestrator: 0, -price_change_pct ) # Positive for downward movement elif predicted_action == "HOLD": - # HOLD evaluation now considers position status + # HOLD evaluation now considers position status AND current P&L if has_position: - # If we have a position, HOLD is correct if price moved favorably or stayed stable - # This prevents penalizing HOLD when we're already in a profitable position - if price_change_pct > 0: # Price went up while holding - good - was_correct = True - directional_accuracy = price_change_pct # Reward based on profit - elif abs(price_change_pct) < movement_threshold: # Price stable - neutral - was_correct = True - directional_accuracy = movement_threshold - abs(price_change_pct) - else: # Price dropped while holding - bad, but less penalty than wrong direction - was_correct = False - directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5 + # If we have a position, HOLD evaluation depends on P&L and price movement + if current_position_pnl > 0: # Currently profitable position + # Holding a profitable position is good if price continues favorably + if price_change_pct > 0: # Price went up while holding profitable position - excellent + was_correct = True + directional_accuracy = price_change_pct * 1.5 # Bonus for holding winners + elif abs(price_change_pct) < movement_threshold: # Price stable - good + was_correct = True + directional_accuracy = movement_threshold + (current_position_pnl / 100.0) # Reward based on existing profit + else: # Price dropped while holding profitable position - still okay but less reward + was_correct = True + directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5) + elif current_position_pnl < 0: # Currently losing position + # Holding a losing position is generally bad - should consider closing + if price_change_pct > movement_threshold: # Price recovered - good hold + was_correct = True + directional_accuracy = price_change_pct * 0.8 # Reduced reward for recovery + else: # Price continued down or stayed flat - bad hold + was_correct = False + # Penalty proportional to loss magnitude + directional_accuracy = abs(current_position_pnl / 100.0) * 0.5 # Penalty for holding losers + else: # Breakeven position + # Standard HOLD evaluation for breakeven positions + if abs(price_change_pct) < movement_threshold: # Price stable - good + was_correct = True + directional_accuracy = movement_threshold - abs(price_change_pct) + else: # Price moved significantly - missed opportunity + was_correct = False + directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7 else: # If we don't have a position, HOLD is correct if price stayed relatively stable was_correct = abs(price_change_pct) < movement_threshold @@ -3627,12 +3655,16 @@ class TradingOrchestrator: # Calculate reward if not provided if sophisticated_reward is None: + symbol = record.get("symbol", self.symbol) + current_pnl = self._get_current_position_pnl(symbol) sophisticated_reward, _ = self._calculate_sophisticated_reward( record.get("action", "HOLD"), record.get("confidence", 0.5), price_change_pct, record.get("time_diff_minutes", 1.0), record.get("has_price_prediction", False), + symbol=symbol, + current_position_pnl=current_pnl, ) # Train decision fusion model if it's the model being evaluated @@ -6510,7 +6542,7 @@ class TradingOrchestrator: logger.error(f"Error getting combined model data for {symbol}: {e}") return None - def _get_current_position_pnl(self, symbol: str, current_price: float) -> float: + def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float: """Get current position P&L for the symbol""" try: if self.trading_executor and hasattr( @@ -6518,16 +6550,22 @@ class TradingOrchestrator: ): position = self.trading_executor.get_current_position(symbol) if position: - entry_price = position.get("price", 0) - size = position.get("size", 0) - side = position.get("side", "LONG") + # If current_price is provided, calculate P&L manually + if current_price is not None: + entry_price = position.get("price", 0) + size = position.get("size", 0) + side = position.get("side", "LONG") - if entry_price and size > 0: - if side.upper() == "LONG": - pnl = (current_price - entry_price) * size - else: # SHORT - pnl = (entry_price - current_price) * size - return pnl + if entry_price and size > 0: + if side.upper() == "LONG": + pnl = (current_price - entry_price) * size + else: # SHORT + pnl = (entry_price - current_price) * size + return pnl + else: + # Use unrealized_pnl from position if available + if position.get("size", 0) > 0: + return float(position.get("unrealized_pnl", 0.0)) return 0.0 except Exception as e: logger.debug(f"Error getting position P&L for {symbol}: {e}") @@ -6545,6 +6583,53 @@ class TradingOrchestrator: except Exception: return False + + + def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position): + """ + Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones + + Args: + base_reward: Original reward from confidence/execution + action: Action taken ('BUY', 'SELL', 'HOLD') + position_pnl: Current position P&L + has_position: Whether we have an open position + + Returns: + Enhanced reward that incentivizes profitable behavior + """ + try: + enhanced_reward = base_reward + + if has_position and position_pnl != 0.0: + # Position-based reward adjustments (similar to CNN but tuned for DQN) + pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale + + if position_pnl > 0: # Profitable position + if action == "HOLD": + # Reward holding profitable positions (let winners run) + enhanced_reward += abs(pnl_factor) * 0.4 + elif action in ["BUY", "SELL"]: + # Moderate reward for taking action on profitable positions + enhanced_reward += abs(pnl_factor) * 0.2 + + elif position_pnl < 0: # Losing position + if action == "HOLD": + # Strong penalty for holding losing positions (cut losses) + enhanced_reward -= abs(pnl_factor) * 1.0 + elif action in ["BUY", "SELL"]: + # Strong reward for taking action to close losing positions + enhanced_reward += abs(pnl_factor) * 0.8 + + # Ensure reward doesn't become extreme (DQN is more sensitive to reward scale) + enhanced_reward = max(-2.0, min(2.0, enhanced_reward)) + + return enhanced_reward + + except Exception as e: + logger.error(f"Error calculating position-enhanced reward for DQN: {e}") + return base_reward + def _close_all_positions(self): """Close all open positions when clearing session""" try: @@ -6889,28 +6974,35 @@ class TradingOrchestrator: action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2} dqn_action = action_mapping.get(action, 2) - # Calculate immediate reward based on confidence and execution - immediate_reward = confidence if action != "HOLD" else 0.0 + # Get position information for enhanced rewards + has_position = self._has_open_position(symbol) + position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0 + + # Calculate position-enhanced reward + base_reward = confidence if action != "HOLD" else 0.1 + enhanced_reward = self._calculate_position_enhanced_reward_for_dqn( + base_reward, action, position_pnl, has_position + ) # Add experience to DQN self.rl_agent.remember( state=state, action=dqn_action, - reward=immediate_reward, + reward=enhanced_reward, next_state=state, # Will be updated with actual outcome later done=False, ) models_trained.append("dqn") logger.debug( - f"๐Ÿง  Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})" + f"๐Ÿง  Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})" ) except Exception as e: logger.debug(f"Error training DQN on decision: {e}") # Train CNN model if available and enabled - if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"): + if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"): try: # Create CNN input features from base_data (same as inference) cnn_features = self._create_cnn_features_from_base_data( @@ -6919,19 +7011,30 @@ class TradingOrchestrator: # Create target based on action target_mapping = { - "BUY": [1, 0, 0], - "SELL": [0, 1, 0], - "HOLD": [0, 0, 1], + "BUY": 0, # Action indices for CNN + "SELL": 1, + "HOLD": 2, } - target = target_mapping.get(action, [0, 0, 1]) + target_action = target_mapping.get(action, 2) - # Add training sample - self.cnn_model.add_training_sample( - cnn_features, target, weight=confidence + # Get position information for enhanced rewards + has_position = self._has_open_position(symbol) + position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0 + + # Calculate base reward from confidence and add position-based enhancement + base_reward = confidence if action != "HOLD" else 0.1 + + # Add training data with position-based reward enhancement + self.cnn_model.add_training_data( + cnn_features, + target_action, + base_reward, + position_pnl=position_pnl, + has_position=has_position ) models_trained.append("cnn") - logger.debug(f"๐Ÿ” Added CNN training sample: {action} {symbol}") + logger.debug(f"๐Ÿ” Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})") except Exception as e: logger.debug(f"Error training CNN on decision: {e}") diff --git a/test_dashboard_data_flow.py b/test_dashboard_data_flow.py new file mode 100644 index 0000000..2d620ef --- /dev/null +++ b/test_dashboard_data_flow.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Test script to debug dashboard data flow issues + +This script tests if the dashboard can properly retrieve and display model data. +""" + +import sys +import os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +import logging +logging.basicConfig(level=logging.DEBUG) + +from web.clean_dashboard import CleanTradingDashboard +from core.orchestrator import TradingOrchestrator +from core.data_provider import DataProvider + +def test_dashboard_data_flow(): + """Test if dashboard can retrieve model data correctly""" + + print("๐Ÿงช DASHBOARD DATA FLOW TEST") + print("=" * 50) + + try: + # Initialize components + data_provider = DataProvider() + orchestrator = TradingOrchestrator(data_provider=data_provider) + + print(f"โœ… Orchestrator initialized") + print(f" Model registry models: {list(orchestrator.model_registry.get_all_models().keys())}") + print(f" Model toggle states: {list(orchestrator.model_toggle_states.keys())}") + + # Initialize dashboard + dashboard = CleanTradingDashboard( + data_provider=data_provider, + orchestrator=orchestrator + ) + + print(f"โœ… Dashboard initialized") + + # Test available models + available_models = dashboard._get_available_models() + print(f" Available models: {list(available_models.keys())}") + + # Test training metrics + print("\n๐Ÿ“Š Testing training metrics...") + toggle_states = {} + for model_name in available_models.keys(): + toggle_states[model_name] = orchestrator.get_model_toggle_state(model_name) + + print(f" Toggle states: {list(toggle_states.keys())}") + + metrics_data = dashboard._get_training_metrics(toggle_states) + print(f" Metrics data type: {type(metrics_data)}") + + if metrics_data and isinstance(metrics_data, dict): + print(f" Metrics keys: {list(metrics_data.keys())}") + if 'loaded_models' in metrics_data: + loaded_models = metrics_data['loaded_models'] + print(f" Loaded models count: {len(loaded_models)}") + for model_name, model_info in loaded_models.items(): + print(f" - {model_name}: active={model_info.get('active', False)}") + else: + print(" โŒ No 'loaded_models' in metrics_data!") + else: + print(f" โŒ Invalid metrics_data: {metrics_data}") + + # Test component manager formatting + print("\n๐ŸŽจ Testing component manager...") + formatted_components = dashboard.component_manager.format_training_metrics(metrics_data) + print(f" Formatted components type: {type(formatted_components)}") + print(f" Formatted components count: {len(formatted_components) if formatted_components else 0}") + + if formatted_components: + print(" โœ… Component manager returned formatted data") + else: + print(" โŒ Component manager returned empty data") + + print("\n๐Ÿš€ Dashboard data flow test completed!") + return True + + except Exception as e: + print(f"โŒ Test failed with error: {e}") + import traceback + traceback.print_exc() + return False + +if __name__ == "__main__": + test_dashboard_data_flow() \ No newline at end of file diff --git a/test_position_based_rewards.py b/test_position_based_rewards.py new file mode 100644 index 0000000..316da43 --- /dev/null +++ b/test_position_based_rewards.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +Test script for position-based reward system + +This script tests the enhanced reward calculations that incentivize: +1. Holding profitable positions (let winners run) +2. Closing losing positions (cut losses) +3. Taking action when appropriate based on P&L +""" + +import sys +import os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from core.orchestrator import TradingOrchestrator +from NN.models.enhanced_cnn import EnhancedCNN +import numpy as np + +def test_position_reward_scenarios(): + """Test various position-based reward scenarios""" + + print("๐Ÿงช POSITION-BASED REWARD SYSTEM TEST") + print("=" * 50) + + # Initialize orchestrator + orchestrator = TradingOrchestrator() + + # Test scenarios + scenarios = [ + # (action, position_pnl, has_position, price_change_pct, description) + ("HOLD", 50.0, True, 0.5, "Hold profitable position with continued gains"), + ("HOLD", 50.0, True, -0.3, "Hold profitable position with small pullback"), + ("HOLD", -30.0, True, 0.8, "Hold losing position that recovers"), + ("HOLD", -30.0, True, -0.5, "Hold losing position that continues down"), + ("SELL", 50.0, True, 0.0, "Close profitable position"), + ("SELL", -30.0, True, 0.0, "Close losing position (good)"), + ("BUY", 0.0, False, 1.0, "New buy position with immediate gain"), + ("HOLD", 0.0, False, 0.1, "Hold with no position (stable market)"), + ] + + print("\n๐Ÿ“Š SOPHISTICATED REWARD CALCULATION TESTS:") + print("-" * 80) + + for i, (action, position_pnl, has_position, price_change_pct, description) in enumerate(scenarios, 1): + # Test sophisticated reward calculation + reward, was_correct = orchestrator._calculate_sophisticated_reward( + predicted_action=action, + prediction_confidence=0.8, + price_change_pct=price_change_pct, + time_diff_minutes=5.0, + has_price_prediction=False, + symbol="ETH/USDT", + has_position=has_position, + current_position_pnl=position_pnl + ) + + print(f"{i:2d}. {description}") + print(f" Action: {action}, P&L: ${position_pnl:+.1f}, Price Change: {price_change_pct:+.1f}%") + print(f" Reward: {reward:+.3f}, Correct: {was_correct}") + print() + + print("\n๐Ÿง  CNN POSITION-ENHANCED REWARD TESTS:") + print("-" * 80) + + # Initialize CNN model + cnn_model = EnhancedCNN(input_shape=100, n_actions=3) + + for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1): + base_reward = 0.5 # Moderate base reward + enhanced_reward = cnn_model._calculate_position_enhanced_reward( + base_reward=base_reward, + action=action, + position_pnl=position_pnl, + has_position=has_position + ) + + enhancement = enhanced_reward - base_reward + print(f"{i:2d}. {description}") + print(f" Action: {action}, P&L: ${position_pnl:+.1f}") + print(f" Base Reward: {base_reward:+.3f} โ†’ Enhanced: {enhanced_reward:+.3f} (ฮ”{enhancement:+.3f})") + print() + + print("\n๐Ÿค– DQN POSITION-ENHANCED REWARD TESTS:") + print("-" * 80) + + for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1): + base_reward = 0.5 # Moderate base reward + enhanced_reward = orchestrator._calculate_position_enhanced_reward_for_dqn( + base_reward=base_reward, + action=action, + position_pnl=position_pnl, + has_position=has_position + ) + + enhancement = enhanced_reward - base_reward + print(f"{i:2d}. {description}") + print(f" Action: {action}, P&L: ${position_pnl:+.1f}") + print(f" Base Reward: {base_reward:+.3f} โ†’ Enhanced: {enhanced_reward:+.3f} (ฮ”{enhancement:+.3f})") + print() + +def test_reward_incentives(): + """Test that rewards properly incentivize desired behaviors""" + + print("\n๐ŸŽฏ REWARD INCENTIVE VALIDATION:") + print("-" * 50) + + orchestrator = TradingOrchestrator() + cnn_model = EnhancedCNN(input_shape=100, n_actions=3) + + # Test 1: Holding winners vs holding losers + print("1. HOLD action comparison:") + + hold_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", 100.0, True) + hold_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True) + + print(f" Hold profitable position (+$100): {hold_winner_reward:+.3f}") + print(f" Hold losing position (-$100): {hold_loser_reward:+.3f}") + print(f" โœ… Incentive correct: {hold_winner_reward > hold_loser_reward}") + + # Test 2: Closing losers vs closing winners + print("\n2. SELL action comparison:") + + sell_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", 100.0, True) + sell_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", -100.0, True) + + print(f" Sell profitable position (+$100): {sell_winner_reward:+.3f}") + print(f" Sell losing position (-$100): {sell_loser_reward:+.3f}") + print(f" โœ… Incentive correct: {sell_loser_reward > sell_winner_reward}") + + # Test 3: DQN reward scaling + print("\n3. DQN vs CNN reward scaling:") + + dqn_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(0.5, "HOLD", -100.0, True) + cnn_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True) + + print(f" DQN penalty for holding loser: {dqn_reward:+.3f}") + print(f" CNN penalty for holding loser: {cnn_reward:+.3f}") + print(f" โœ… DQN more sensitive: {abs(dqn_reward) > abs(cnn_reward)}") + +def main(): + """Run all position-based reward tests""" + try: + test_position_reward_scenarios() + test_reward_incentives() + + print("\n๐Ÿš€ POSITION-BASED REWARD SYSTEM VALIDATION COMPLETE!") + print("โœ… System properly incentivizes:") + print(" โ€ข Holding profitable positions (let winners run)") + print(" โ€ข Closing losing positions (cut losses)") + print(" โ€ข Taking appropriate action based on P&L") + print(" โ€ข Different reward scaling for CNN vs DQN models") + + except Exception as e: + print(f"โŒ Test failed with error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/web/clean_dashboard.py b/web/clean_dashboard.py index 70cb267..83dbf99 100644 --- a/web/clean_dashboard.py +++ b/web/clean_dashboard.py @@ -964,6 +964,7 @@ class CleanTradingDashboard: ) def update_metrics(n): """Update key metrics - ENHANCED with position sync monitoring""" + logger.debug(f"update_metrics callback triggered (n={n})") try: # PERIODIC POSITION SYNC: Every 30 seconds, verify position sync if n % 30 == 0 and n > 0: # Skip initial load (n=0) @@ -1102,7 +1103,14 @@ class CleanTradingDashboard: # For simulation, show starting balance + session P&L current_balance = self._cached_live_balance if hasattr(self, '_cached_live_balance') else self._get_initial_balance() portfolio_value = current_balance + total_session_pnl # Live balance + unrealized P&L - portfolio_str = f"${portfolio_value:.2f}" + + # Add max position info to portfolio display + try: + max_position_info = self._calculate_max_position_display() + portfolio_str = f"${portfolio_value:.2f} | {max_position_info}" + except Exception as e: + logger.error(f"Error calculating max position display: {e}") + portfolio_str = f"${portfolio_value:.2f}" # Profitability multiplier - get from trading executor profitability_multiplier = 0.0 @@ -1352,6 +1360,11 @@ class CleanTradingDashboard: logger.debug(f"Metrics data keys: {list(metrics_data.keys())}") if 'loaded_models' in metrics_data: logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}") + logger.debug(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}") + else: + logger.warning("No 'loaded_models' key in metrics_data!") + else: + logger.warning(f"Invalid metrics_data: {metrics_data}") return self.component_manager.format_training_metrics(metrics_data) except PreventUpdate: raise @@ -1646,6 +1659,38 @@ class CleanTradingDashboard: logger.debug(f"Error calculating opening fee: {e}") return position_size_usd * 0.0006 # Fallback to 0.06% + def _calculate_max_position_display(self) -> str: + """Calculate and display maximum position size based on current balance and leverage""" + try: + # Get current balance + current_balance = self._get_live_account_balance() + if current_balance <= 0: + return "No Balance" + + # Get current leverage + leverage = getattr(self, 'current_leverage', 50) # Default to 50x + + # Get current price for ETH/USDT + current_price = self._get_current_price('ETH/USDT') + if not current_price or current_price <= 0: + return "Price N/A" + + # Calculate maximum position value (balance * leverage) + max_position_value = current_balance * leverage + + # Calculate maximum ETH quantity + max_eth_quantity = max_position_value / current_price + + # Format display + if max_eth_quantity >= 0.01: # Show in ETH if >= 0.01 + return f"${max_position_value:.1f} ({max_eth_quantity:.2f} ETH)" + else: + return f"${max_position_value:.1f} ({max_eth_quantity:.4f} ETH)" + + except Exception as e: + logger.debug(f"Error calculating max position display: {e}") + return "Calc Error" + def _calculate_closing_fee(self, current_price: float, quantity: float) -> float: """Calculate closing fee for a position at current price""" try: @@ -3532,11 +3577,22 @@ class CleanTradingDashboard: if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'): try: model_states = self.orchestrator.get_model_states() - logger.debug(f"Retrieved model states from orchestrator: {model_states}") + logger.debug(f"Retrieved model states from orchestrator: {list(model_states.keys()) if model_states else 'None'}") except Exception as e: logger.error(f"Error getting model states from orchestrator: {e}") model_states = None + # Also try to get orchestrator statistics for debugging + if self.orchestrator: + try: + all_stats = self.orchestrator.get_model_statistics() + if all_stats: + logger.debug(f"Available orchestrator statistics: {list(all_stats.keys())}") + else: + logger.debug("No orchestrator statistics available") + except Exception as e: + logger.debug(f"Error getting orchestrator statistics: {e}") + # Fallback if orchestrator not available or returns None if model_states is None: logger.warning("No model states available from orchestrator, using fallback") @@ -3549,6 +3605,26 @@ class CleanTradingDashboard: 'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False} } + # Create mapping for model states to handle both old and new model names + if model_states and self.orchestrator: + # Map new registry names to old dashboard names for compatibility + registry_to_dashboard_mapping = { + 'dqn_agent': 'dqn', + 'enhanced_cnn': 'cnn', + 'cob_rl_model': 'cob_rl', + 'decision_fusion': 'decision_fusion', + 'transformer': 'transformer' + } + + # Copy states from new names to old names if they exist + for registry_name, dashboard_name in registry_to_dashboard_mapping.items(): + if registry_name in model_states and dashboard_name not in model_states: + model_states[dashboard_name] = model_states[registry_name] + logger.debug(f"Mapped model state {registry_name} -> {dashboard_name}") + elif dashboard_name not in model_states: + # Ensure we have a state for the dashboard name + model_states[dashboard_name] = {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False} + # Get latest predictions from all models latest_predictions = self._get_latest_model_predictions() cnn_prediction = self._get_cnn_pivot_prediction() @@ -3598,6 +3674,23 @@ class CleanTradingDashboard: "transformer": {"inference_enabled": True, "training_enabled": True} } + # Create mapping for backward compatibility between old dashboard names and new registry names + model_name_mapping = { + 'dqn': 'dqn_agent', + 'cnn': 'enhanced_cnn', + 'cob_rl': 'cob_rl_model', + 'decision_fusion': 'decision_fusion', + 'transformer': 'transformer' + } + + # Ensure we have toggle states for the old names used by the dashboard + for old_name, new_name in model_name_mapping.items(): + if old_name not in toggle_states and new_name in toggle_states: + toggle_states[old_name] = toggle_states[new_name] + elif old_name not in toggle_states: + # Default state if neither old nor new name exists + toggle_states[old_name] = {"inference_enabled": True, "training_enabled": True} + # Helper function to safely calculate improvement percentage def safe_improvement_calc(initial, current, default_improvement=0.0): try: @@ -3705,8 +3798,8 @@ class CleanTradingDashboard: last_confidence = 0.68 last_timestamp = datetime.now().strftime('%H:%M:%S') - # Get real DQN statistics from orchestrator - dqn_stats = orchestrator_stats.get('dqn_agent') + # Get real DQN statistics from orchestrator (try both old and new names) + dqn_stats = orchestrator_stats.get('dqn_agent') or orchestrator_stats.get('dqn') dqn_current_loss = dqn_stats.current_loss if dqn_stats else None dqn_best_loss = dqn_stats.best_loss if dqn_stats else None dqn_accuracy = dqn_stats.accuracy if dqn_stats else None @@ -3786,8 +3879,8 @@ class CleanTradingDashboard: cnn_state = model_states.get('cnn', {}) cnn_timing = get_model_timing_info('CNN') - # Get real CNN statistics from orchestrator - cnn_stats = orchestrator_stats.get('enhanced_cnn') + # Get real CNN statistics from orchestrator (try both old and new names) + cnn_stats = orchestrator_stats.get('enhanced_cnn') or orchestrator_stats.get('cnn') cnn_active = cnn_stats is not None # Get latest CNN prediction from orchestrator statistics @@ -4153,12 +4246,15 @@ class CleanTradingDashboard: # DEBUG: Log what we're returning models_count = len(metrics.get('loaded_models', {})) - logger.info(f"Training metrics being returned: {models_count} models loaded") + logger.debug(f"Training metrics being returned: {models_count} models loaded") if models_count == 0: logger.warning("No models in loaded_models!") logger.warning(f"Metrics keys: {list(metrics.keys())}") - for model_name, model_info in metrics.get('loaded_models', {}).items(): - logger.info(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}") + logger.warning(f"Model states available: {list(model_states.keys()) if model_states else 'None'}") + logger.warning(f"Toggle states available: {list(toggle_states.keys()) if toggle_states else 'None'}") + else: + for model_name, model_info in metrics.get('loaded_models', {}).items(): + logger.debug(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}") return metrics