PnL in reward, show leveraged power in dash (broken)

This commit is contained in:
Dobromir Popov
2025-07-29 17:42:00 +03:00
parent d35530a9e9
commit 3a532a1220
5 changed files with 553 additions and 49 deletions

View File

@ -3,6 +3,7 @@ import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import numpy as np import numpy as np
import os import os
import time
import logging import logging
import torch.nn.functional as F import torch.nn.functional as F
from typing import List, Tuple, Dict, Any, Optional, Union from typing import List, Tuple, Dict, Any, Optional, Union
@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module):
'weighted_strength': 0.0 'weighted_strength': 0.0
} }
def add_training_data(self, state, action, reward): def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
""" """
Add training data to the model's training buffer Add training data to the model's training buffer with position-based reward enhancement
Args: Args:
state: Input state state: Input state
action: Action taken action: Action taken
reward: Reward received reward: Base reward received
position_pnl: Current position P&L (0.0 if no position)
has_position: Whether we currently have an open position
""" """
try: try:
# Enhance reward based on position status
enhanced_reward = self._calculate_position_enhanced_reward(
reward, action, position_pnl, has_position
)
self.training_data.append({ self.training_data.append({
'state': state, 'state': state,
'action': action, 'action': action,
'reward': reward, 'reward': enhanced_reward,
'base_reward': reward, # Keep original reward for analysis
'position_pnl': position_pnl,
'has_position': has_position,
'timestamp': time.time() 'timestamp': time.time()
}) })
@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module):
except Exception as e: except Exception as e:
logger.error(f"Error adding training data: {e}") logger.error(f"Error adding training data: {e}")
def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
"""
Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
Args:
base_reward: Original reward from price prediction accuracy
action: Action taken ('BUY', 'SELL', 'HOLD')
position_pnl: Current position P&L
has_position: Whether we have an open position
Returns:
Enhanced reward that incentivizes profitable behavior
"""
try:
enhanced_reward = base_reward
if has_position and position_pnl != 0.0:
# Position-based reward adjustments
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
if position_pnl > 0: # Profitable position
if action == "HOLD":
# Reward holding profitable positions (let winners run)
enhanced_reward += abs(pnl_factor) * 0.5
elif action in ["BUY", "SELL"]:
# Moderate reward for taking action on profitable positions
enhanced_reward += abs(pnl_factor) * 0.3
elif position_pnl < 0: # Losing position
if action == "HOLD":
# Penalty for holding losing positions (cut losses)
enhanced_reward -= abs(pnl_factor) * 0.8
elif action in ["BUY", "SELL"]:
# Reward for taking action to close losing positions
enhanced_reward += abs(pnl_factor) * 0.6
# Ensure reward doesn't become extreme
enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
return enhanced_reward
except Exception as e:
logger.error(f"Error calculating position-enhanced reward: {e}")
return base_reward
def save(self, path): def save(self, path):
"""Save model weights and architecture""" """Save model weights and architecture"""

View File

@ -3267,12 +3267,15 @@ class TradingOrchestrator:
avg_loss = model_stats.average_loss if model_stats else None avg_loss = model_stats.average_loss if model_stats else None
# Calculate reward for logging # Calculate reward for logging
current_pnl = self._get_current_position_pnl(self.symbol)
reward, _ = self._calculate_sophisticated_reward( reward, _ = self._calculate_sophisticated_reward(
predicted_action, predicted_action,
predicted_confidence, predicted_confidence,
actual_price_change_pct, actual_price_change_pct,
time_diff_seconds / 60, # Convert to minutes time_diff_seconds / 60, # Convert to minutes
has_price_prediction=predicted_price is not None, has_price_prediction=predicted_price is not None,
symbol=self.symbol,
current_position_pnl=current_pnl,
) )
# Enhanced logging with detailed information # Enhanced logging with detailed information
@ -3361,6 +3364,7 @@ class TradingOrchestrator:
) # Default to 0.5 if missing ) # Default to 0.5 if missing
# Calculate sophisticated reward based on multiple factors # Calculate sophisticated reward based on multiple factors
current_pnl = self._get_current_position_pnl(symbol)
reward, was_correct = self._calculate_sophisticated_reward( reward, was_correct = self._calculate_sophisticated_reward(
predicted_action, predicted_action,
prediction_confidence, prediction_confidence,
@ -3369,6 +3373,7 @@ class TradingOrchestrator:
inference_price is not None, # Add price prediction flag inference_price is not None, # Add price prediction flag
symbol, # Pass symbol for position lookup symbol, # Pass symbol for position lookup
None, # Let method determine position status None, # Let method determine position status
current_position_pnl=current_pnl,
) )
# Update model performance tracking # Update model performance tracking
@ -3476,10 +3481,11 @@ class TradingOrchestrator:
has_price_prediction: bool = False, has_price_prediction: bool = False,
symbol: str = None, symbol: str = None,
has_position: bool = None, has_position: bool = None,
current_position_pnl: float = 0.0,
) -> tuple[float, bool]: ) -> tuple[float, bool]:
""" """
Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
Now considers position status when evaluating HOLD decisions Now considers position status and current P&L when evaluating decisions
Args: Args:
predicted_action: The predicted action ('BUY', 'SELL', 'HOLD') predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@ -3489,6 +3495,7 @@ class TradingOrchestrator:
has_price_prediction: Whether the model made a price prediction has_price_prediction: Whether the model made a price prediction
symbol: Trading symbol (for position lookup) symbol: Trading symbol (for position lookup)
has_position: Whether we currently have a position (if None, will be looked up) has_position: Whether we currently have a position (if None, will be looked up)
current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
Returns: Returns:
tuple: (reward, was_correct) tuple: (reward, was_correct)
@ -3500,6 +3507,9 @@ class TradingOrchestrator:
# Determine current position status if not provided # Determine current position status if not provided
if has_position is None and symbol: if has_position is None and symbol:
has_position = self._has_open_position(symbol) has_position = self._has_open_position(symbol)
# Get current position P&L if we have a position
if has_position and current_position_pnl == 0.0:
current_position_pnl = self._get_current_position_pnl(symbol)
elif has_position is None: elif has_position is None:
has_position = False has_position = False
@ -3518,19 +3528,37 @@ class TradingOrchestrator:
0, -price_change_pct 0, -price_change_pct
) # Positive for downward movement ) # Positive for downward movement
elif predicted_action == "HOLD": elif predicted_action == "HOLD":
# HOLD evaluation now considers position status # HOLD evaluation now considers position status AND current P&L
if has_position: if has_position:
# If we have a position, HOLD is correct if price moved favorably or stayed stable # If we have a position, HOLD evaluation depends on P&L and price movement
# This prevents penalizing HOLD when we're already in a profitable position if current_position_pnl > 0: # Currently profitable position
if price_change_pct > 0: # Price went up while holding - good # Holding a profitable position is good if price continues favorably
was_correct = True if price_change_pct > 0: # Price went up while holding profitable position - excellent
directional_accuracy = price_change_pct # Reward based on profit was_correct = True
elif abs(price_change_pct) < movement_threshold: # Price stable - neutral directional_accuracy = price_change_pct * 1.5 # Bonus for holding winners
was_correct = True elif abs(price_change_pct) < movement_threshold: # Price stable - good
directional_accuracy = movement_threshold - abs(price_change_pct) was_correct = True
else: # Price dropped while holding - bad, but less penalty than wrong direction directional_accuracy = movement_threshold + (current_position_pnl / 100.0) # Reward based on existing profit
was_correct = False else: # Price dropped while holding profitable position - still okay but less reward
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5 was_correct = True
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
elif current_position_pnl < 0: # Currently losing position
# Holding a losing position is generally bad - should consider closing
if price_change_pct > movement_threshold: # Price recovered - good hold
was_correct = True
directional_accuracy = price_change_pct * 0.8 # Reduced reward for recovery
else: # Price continued down or stayed flat - bad hold
was_correct = False
# Penalty proportional to loss magnitude
directional_accuracy = abs(current_position_pnl / 100.0) * 0.5 # Penalty for holding losers
else: # Breakeven position
# Standard HOLD evaluation for breakeven positions
if abs(price_change_pct) < movement_threshold: # Price stable - good
was_correct = True
directional_accuracy = movement_threshold - abs(price_change_pct)
else: # Price moved significantly - missed opportunity
was_correct = False
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
else: else:
# If we don't have a position, HOLD is correct if price stayed relatively stable # If we don't have a position, HOLD is correct if price stayed relatively stable
was_correct = abs(price_change_pct) < movement_threshold was_correct = abs(price_change_pct) < movement_threshold
@ -3627,12 +3655,16 @@ class TradingOrchestrator:
# Calculate reward if not provided # Calculate reward if not provided
if sophisticated_reward is None: if sophisticated_reward is None:
symbol = record.get("symbol", self.symbol)
current_pnl = self._get_current_position_pnl(symbol)
sophisticated_reward, _ = self._calculate_sophisticated_reward( sophisticated_reward, _ = self._calculate_sophisticated_reward(
record.get("action", "HOLD"), record.get("action", "HOLD"),
record.get("confidence", 0.5), record.get("confidence", 0.5),
price_change_pct, price_change_pct,
record.get("time_diff_minutes", 1.0), record.get("time_diff_minutes", 1.0),
record.get("has_price_prediction", False), record.get("has_price_prediction", False),
symbol=symbol,
current_position_pnl=current_pnl,
) )
# Train decision fusion model if it's the model being evaluated # Train decision fusion model if it's the model being evaluated
@ -6510,7 +6542,7 @@ class TradingOrchestrator:
logger.error(f"Error getting combined model data for {symbol}: {e}") logger.error(f"Error getting combined model data for {symbol}: {e}")
return None return None
def _get_current_position_pnl(self, symbol: str, current_price: float) -> float: def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
"""Get current position P&L for the symbol""" """Get current position P&L for the symbol"""
try: try:
if self.trading_executor and hasattr( if self.trading_executor and hasattr(
@ -6518,16 +6550,22 @@ class TradingOrchestrator:
): ):
position = self.trading_executor.get_current_position(symbol) position = self.trading_executor.get_current_position(symbol)
if position: if position:
entry_price = position.get("price", 0) # If current_price is provided, calculate P&L manually
size = position.get("size", 0) if current_price is not None:
side = position.get("side", "LONG") entry_price = position.get("price", 0)
size = position.get("size", 0)
side = position.get("side", "LONG")
if entry_price and size > 0: if entry_price and size > 0:
if side.upper() == "LONG": if side.upper() == "LONG":
pnl = (current_price - entry_price) * size pnl = (current_price - entry_price) * size
else: # SHORT else: # SHORT
pnl = (entry_price - current_price) * size pnl = (entry_price - current_price) * size
return pnl return pnl
else:
# Use unrealized_pnl from position if available
if position.get("size", 0) > 0:
return float(position.get("unrealized_pnl", 0.0))
return 0.0 return 0.0
except Exception as e: except Exception as e:
logger.debug(f"Error getting position P&L for {symbol}: {e}") logger.debug(f"Error getting position P&L for {symbol}: {e}")
@ -6545,6 +6583,53 @@ class TradingOrchestrator:
except Exception: except Exception:
return False return False
def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
"""
Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
Args:
base_reward: Original reward from confidence/execution
action: Action taken ('BUY', 'SELL', 'HOLD')
position_pnl: Current position P&L
has_position: Whether we have an open position
Returns:
Enhanced reward that incentivizes profitable behavior
"""
try:
enhanced_reward = base_reward
if has_position and position_pnl != 0.0:
# Position-based reward adjustments (similar to CNN but tuned for DQN)
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
if position_pnl > 0: # Profitable position
if action == "HOLD":
# Reward holding profitable positions (let winners run)
enhanced_reward += abs(pnl_factor) * 0.4
elif action in ["BUY", "SELL"]:
# Moderate reward for taking action on profitable positions
enhanced_reward += abs(pnl_factor) * 0.2
elif position_pnl < 0: # Losing position
if action == "HOLD":
# Strong penalty for holding losing positions (cut losses)
enhanced_reward -= abs(pnl_factor) * 1.0
elif action in ["BUY", "SELL"]:
# Strong reward for taking action to close losing positions
enhanced_reward += abs(pnl_factor) * 0.8
# Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
return enhanced_reward
except Exception as e:
logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
return base_reward
def _close_all_positions(self): def _close_all_positions(self):
"""Close all open positions when clearing session""" """Close all open positions when clearing session"""
try: try:
@ -6889,28 +6974,35 @@ class TradingOrchestrator:
action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2} action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
dqn_action = action_mapping.get(action, 2) dqn_action = action_mapping.get(action, 2)
# Calculate immediate reward based on confidence and execution # Get position information for enhanced rewards
immediate_reward = confidence if action != "HOLD" else 0.0 has_position = self._has_open_position(symbol)
position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
# Calculate position-enhanced reward
base_reward = confidence if action != "HOLD" else 0.1
enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
base_reward, action, position_pnl, has_position
)
# Add experience to DQN # Add experience to DQN
self.rl_agent.remember( self.rl_agent.remember(
state=state, state=state,
action=dqn_action, action=dqn_action,
reward=immediate_reward, reward=enhanced_reward,
next_state=state, # Will be updated with actual outcome later next_state=state, # Will be updated with actual outcome later
done=False, done=False,
) )
models_trained.append("dqn") models_trained.append("dqn")
logger.debug( logger.debug(
f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})" f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
) )
except Exception as e: except Exception as e:
logger.debug(f"Error training DQN on decision: {e}") logger.debug(f"Error training DQN on decision: {e}")
# Train CNN model if available and enabled # Train CNN model if available and enabled
if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"): if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
try: try:
# Create CNN input features from base_data (same as inference) # Create CNN input features from base_data (same as inference)
cnn_features = self._create_cnn_features_from_base_data( cnn_features = self._create_cnn_features_from_base_data(
@ -6919,19 +7011,30 @@ class TradingOrchestrator:
# Create target based on action # Create target based on action
target_mapping = { target_mapping = {
"BUY": [1, 0, 0], "BUY": 0, # Action indices for CNN
"SELL": [0, 1, 0], "SELL": 1,
"HOLD": [0, 0, 1], "HOLD": 2,
} }
target = target_mapping.get(action, [0, 0, 1]) target_action = target_mapping.get(action, 2)
# Add training sample # Get position information for enhanced rewards
self.cnn_model.add_training_sample( has_position = self._has_open_position(symbol)
cnn_features, target, weight=confidence position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
# Calculate base reward from confidence and add position-based enhancement
base_reward = confidence if action != "HOLD" else 0.1
# Add training data with position-based reward enhancement
self.cnn_model.add_training_data(
cnn_features,
target_action,
base_reward,
position_pnl=position_pnl,
has_position=has_position
) )
models_trained.append("cnn") models_trained.append("cnn")
logger.debug(f"🔍 Added CNN training sample: {action} {symbol}") logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")
except Exception as e: except Exception as e:
logger.debug(f"Error training CNN on decision: {e}") logger.debug(f"Error training CNN on decision: {e}")

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
Test script to debug dashboard data flow issues
This script tests if the dashboard can properly retrieve and display model data.
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import logging
logging.basicConfig(level=logging.DEBUG)
from web.clean_dashboard import CleanTradingDashboard
from core.orchestrator import TradingOrchestrator
from core.data_provider import DataProvider
def test_dashboard_data_flow():
"""Test if dashboard can retrieve model data correctly"""
print("🧪 DASHBOARD DATA FLOW TEST")
print("=" * 50)
try:
# Initialize components
data_provider = DataProvider()
orchestrator = TradingOrchestrator(data_provider=data_provider)
print(f"✅ Orchestrator initialized")
print(f" Model registry models: {list(orchestrator.model_registry.get_all_models().keys())}")
print(f" Model toggle states: {list(orchestrator.model_toggle_states.keys())}")
# Initialize dashboard
dashboard = CleanTradingDashboard(
data_provider=data_provider,
orchestrator=orchestrator
)
print(f"✅ Dashboard initialized")
# Test available models
available_models = dashboard._get_available_models()
print(f" Available models: {list(available_models.keys())}")
# Test training metrics
print("\n📊 Testing training metrics...")
toggle_states = {}
for model_name in available_models.keys():
toggle_states[model_name] = orchestrator.get_model_toggle_state(model_name)
print(f" Toggle states: {list(toggle_states.keys())}")
metrics_data = dashboard._get_training_metrics(toggle_states)
print(f" Metrics data type: {type(metrics_data)}")
if metrics_data and isinstance(metrics_data, dict):
print(f" Metrics keys: {list(metrics_data.keys())}")
if 'loaded_models' in metrics_data:
loaded_models = metrics_data['loaded_models']
print(f" Loaded models count: {len(loaded_models)}")
for model_name, model_info in loaded_models.items():
print(f" - {model_name}: active={model_info.get('active', False)}")
else:
print(" ❌ No 'loaded_models' in metrics_data!")
else:
print(f" ❌ Invalid metrics_data: {metrics_data}")
# Test component manager formatting
print("\n🎨 Testing component manager...")
formatted_components = dashboard.component_manager.format_training_metrics(metrics_data)
print(f" Formatted components type: {type(formatted_components)}")
print(f" Formatted components count: {len(formatted_components) if formatted_components else 0}")
if formatted_components:
print(" ✅ Component manager returned formatted data")
else:
print(" ❌ Component manager returned empty data")
print("\n🚀 Dashboard data flow test completed!")
return True
except Exception as e:
print(f"❌ Test failed with error: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
test_dashboard_data_flow()

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""
Test script for position-based reward system
This script tests the enhanced reward calculations that incentivize:
1. Holding profitable positions (let winners run)
2. Closing losing positions (cut losses)
3. Taking action when appropriate based on P&L
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from core.orchestrator import TradingOrchestrator
from NN.models.enhanced_cnn import EnhancedCNN
import numpy as np
def test_position_reward_scenarios():
"""Test various position-based reward scenarios"""
print("🧪 POSITION-BASED REWARD SYSTEM TEST")
print("=" * 50)
# Initialize orchestrator
orchestrator = TradingOrchestrator()
# Test scenarios
scenarios = [
# (action, position_pnl, has_position, price_change_pct, description)
("HOLD", 50.0, True, 0.5, "Hold profitable position with continued gains"),
("HOLD", 50.0, True, -0.3, "Hold profitable position with small pullback"),
("HOLD", -30.0, True, 0.8, "Hold losing position that recovers"),
("HOLD", -30.0, True, -0.5, "Hold losing position that continues down"),
("SELL", 50.0, True, 0.0, "Close profitable position"),
("SELL", -30.0, True, 0.0, "Close losing position (good)"),
("BUY", 0.0, False, 1.0, "New buy position with immediate gain"),
("HOLD", 0.0, False, 0.1, "Hold with no position (stable market)"),
]
print("\n📊 SOPHISTICATED REWARD CALCULATION TESTS:")
print("-" * 80)
for i, (action, position_pnl, has_position, price_change_pct, description) in enumerate(scenarios, 1):
# Test sophisticated reward calculation
reward, was_correct = orchestrator._calculate_sophisticated_reward(
predicted_action=action,
prediction_confidence=0.8,
price_change_pct=price_change_pct,
time_diff_minutes=5.0,
has_price_prediction=False,
symbol="ETH/USDT",
has_position=has_position,
current_position_pnl=position_pnl
)
print(f"{i:2d}. {description}")
print(f" Action: {action}, P&L: ${position_pnl:+.1f}, Price Change: {price_change_pct:+.1f}%")
print(f" Reward: {reward:+.3f}, Correct: {was_correct}")
print()
print("\n🧠 CNN POSITION-ENHANCED REWARD TESTS:")
print("-" * 80)
# Initialize CNN model
cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
base_reward = 0.5 # Moderate base reward
enhanced_reward = cnn_model._calculate_position_enhanced_reward(
base_reward=base_reward,
action=action,
position_pnl=position_pnl,
has_position=has_position
)
enhancement = enhanced_reward - base_reward
print(f"{i:2d}. {description}")
print(f" Action: {action}, P&L: ${position_pnl:+.1f}")
print(f" Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f}{enhancement:+.3f})")
print()
print("\n🤖 DQN POSITION-ENHANCED REWARD TESTS:")
print("-" * 80)
for i, (action, position_pnl, has_position, _, description) in enumerate(scenarios, 1):
base_reward = 0.5 # Moderate base reward
enhanced_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(
base_reward=base_reward,
action=action,
position_pnl=position_pnl,
has_position=has_position
)
enhancement = enhanced_reward - base_reward
print(f"{i:2d}. {description}")
print(f" Action: {action}, P&L: ${position_pnl:+.1f}")
print(f" Base Reward: {base_reward:+.3f} → Enhanced: {enhanced_reward:+.3f}{enhancement:+.3f})")
print()
def test_reward_incentives():
"""Test that rewards properly incentivize desired behaviors"""
print("\n🎯 REWARD INCENTIVE VALIDATION:")
print("-" * 50)
orchestrator = TradingOrchestrator()
cnn_model = EnhancedCNN(input_shape=100, n_actions=3)
# Test 1: Holding winners vs holding losers
print("1. HOLD action comparison:")
hold_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", 100.0, True)
hold_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
print(f" Hold profitable position (+$100): {hold_winner_reward:+.3f}")
print(f" Hold losing position (-$100): {hold_loser_reward:+.3f}")
print(f" ✅ Incentive correct: {hold_winner_reward > hold_loser_reward}")
# Test 2: Closing losers vs closing winners
print("\n2. SELL action comparison:")
sell_winner_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", 100.0, True)
sell_loser_reward = cnn_model._calculate_position_enhanced_reward(0.5, "SELL", -100.0, True)
print(f" Sell profitable position (+$100): {sell_winner_reward:+.3f}")
print(f" Sell losing position (-$100): {sell_loser_reward:+.3f}")
print(f" ✅ Incentive correct: {sell_loser_reward > sell_winner_reward}")
# Test 3: DQN reward scaling
print("\n3. DQN vs CNN reward scaling:")
dqn_reward = orchestrator._calculate_position_enhanced_reward_for_dqn(0.5, "HOLD", -100.0, True)
cnn_reward = cnn_model._calculate_position_enhanced_reward(0.5, "HOLD", -100.0, True)
print(f" DQN penalty for holding loser: {dqn_reward:+.3f}")
print(f" CNN penalty for holding loser: {cnn_reward:+.3f}")
print(f" ✅ DQN more sensitive: {abs(dqn_reward) > abs(cnn_reward)}")
def main():
"""Run all position-based reward tests"""
try:
test_position_reward_scenarios()
test_reward_incentives()
print("\n🚀 POSITION-BASED REWARD SYSTEM VALIDATION COMPLETE!")
print("✅ System properly incentivizes:")
print(" • Holding profitable positions (let winners run)")
print(" • Closing losing positions (cut losses)")
print(" • Taking appropriate action based on P&L")
print(" • Different reward scaling for CNN vs DQN models")
except Exception as e:
print(f"❌ Test failed with error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

View File

@ -964,6 +964,7 @@ class CleanTradingDashboard:
) )
def update_metrics(n): def update_metrics(n):
"""Update key metrics - ENHANCED with position sync monitoring""" """Update key metrics - ENHANCED with position sync monitoring"""
logger.debug(f"update_metrics callback triggered (n={n})")
try: try:
# PERIODIC POSITION SYNC: Every 30 seconds, verify position sync # PERIODIC POSITION SYNC: Every 30 seconds, verify position sync
if n % 30 == 0 and n > 0: # Skip initial load (n=0) if n % 30 == 0 and n > 0: # Skip initial load (n=0)
@ -1102,7 +1103,14 @@ class CleanTradingDashboard:
# For simulation, show starting balance + session P&L # For simulation, show starting balance + session P&L
current_balance = self._cached_live_balance if hasattr(self, '_cached_live_balance') else self._get_initial_balance() current_balance = self._cached_live_balance if hasattr(self, '_cached_live_balance') else self._get_initial_balance()
portfolio_value = current_balance + total_session_pnl # Live balance + unrealized P&L portfolio_value = current_balance + total_session_pnl # Live balance + unrealized P&L
portfolio_str = f"${portfolio_value:.2f}"
# Add max position info to portfolio display
try:
max_position_info = self._calculate_max_position_display()
portfolio_str = f"${portfolio_value:.2f} | {max_position_info}"
except Exception as e:
logger.error(f"Error calculating max position display: {e}")
portfolio_str = f"${portfolio_value:.2f}"
# Profitability multiplier - get from trading executor # Profitability multiplier - get from trading executor
profitability_multiplier = 0.0 profitability_multiplier = 0.0
@ -1352,6 +1360,11 @@ class CleanTradingDashboard:
logger.debug(f"Metrics data keys: {list(metrics_data.keys())}") logger.debug(f"Metrics data keys: {list(metrics_data.keys())}")
if 'loaded_models' in metrics_data: if 'loaded_models' in metrics_data:
logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}") logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}")
logger.debug(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}")
else:
logger.warning("No 'loaded_models' key in metrics_data!")
else:
logger.warning(f"Invalid metrics_data: {metrics_data}")
return self.component_manager.format_training_metrics(metrics_data) return self.component_manager.format_training_metrics(metrics_data)
except PreventUpdate: except PreventUpdate:
raise raise
@ -1646,6 +1659,38 @@ class CleanTradingDashboard:
logger.debug(f"Error calculating opening fee: {e}") logger.debug(f"Error calculating opening fee: {e}")
return position_size_usd * 0.0006 # Fallback to 0.06% return position_size_usd * 0.0006 # Fallback to 0.06%
def _calculate_max_position_display(self) -> str:
"""Calculate and display maximum position size based on current balance and leverage"""
try:
# Get current balance
current_balance = self._get_live_account_balance()
if current_balance <= 0:
return "No Balance"
# Get current leverage
leverage = getattr(self, 'current_leverage', 50) # Default to 50x
# Get current price for ETH/USDT
current_price = self._get_current_price('ETH/USDT')
if not current_price or current_price <= 0:
return "Price N/A"
# Calculate maximum position value (balance * leverage)
max_position_value = current_balance * leverage
# Calculate maximum ETH quantity
max_eth_quantity = max_position_value / current_price
# Format display
if max_eth_quantity >= 0.01: # Show in ETH if >= 0.01
return f"${max_position_value:.1f} ({max_eth_quantity:.2f} ETH)"
else:
return f"${max_position_value:.1f} ({max_eth_quantity:.4f} ETH)"
except Exception as e:
logger.debug(f"Error calculating max position display: {e}")
return "Calc Error"
def _calculate_closing_fee(self, current_price: float, quantity: float) -> float: def _calculate_closing_fee(self, current_price: float, quantity: float) -> float:
"""Calculate closing fee for a position at current price""" """Calculate closing fee for a position at current price"""
try: try:
@ -3532,11 +3577,22 @@ class CleanTradingDashboard:
if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'): if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'):
try: try:
model_states = self.orchestrator.get_model_states() model_states = self.orchestrator.get_model_states()
logger.debug(f"Retrieved model states from orchestrator: {model_states}") logger.debug(f"Retrieved model states from orchestrator: {list(model_states.keys()) if model_states else 'None'}")
except Exception as e: except Exception as e:
logger.error(f"Error getting model states from orchestrator: {e}") logger.error(f"Error getting model states from orchestrator: {e}")
model_states = None model_states = None
# Also try to get orchestrator statistics for debugging
if self.orchestrator:
try:
all_stats = self.orchestrator.get_model_statistics()
if all_stats:
logger.debug(f"Available orchestrator statistics: {list(all_stats.keys())}")
else:
logger.debug("No orchestrator statistics available")
except Exception as e:
logger.debug(f"Error getting orchestrator statistics: {e}")
# Fallback if orchestrator not available or returns None # Fallback if orchestrator not available or returns None
if model_states is None: if model_states is None:
logger.warning("No model states available from orchestrator, using fallback") logger.warning("No model states available from orchestrator, using fallback")
@ -3549,6 +3605,26 @@ class CleanTradingDashboard:
'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False} 'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
} }
# Create mapping for model states to handle both old and new model names
if model_states and self.orchestrator:
# Map new registry names to old dashboard names for compatibility
registry_to_dashboard_mapping = {
'dqn_agent': 'dqn',
'enhanced_cnn': 'cnn',
'cob_rl_model': 'cob_rl',
'decision_fusion': 'decision_fusion',
'transformer': 'transformer'
}
# Copy states from new names to old names if they exist
for registry_name, dashboard_name in registry_to_dashboard_mapping.items():
if registry_name in model_states and dashboard_name not in model_states:
model_states[dashboard_name] = model_states[registry_name]
logger.debug(f"Mapped model state {registry_name} -> {dashboard_name}")
elif dashboard_name not in model_states:
# Ensure we have a state for the dashboard name
model_states[dashboard_name] = {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
# Get latest predictions from all models # Get latest predictions from all models
latest_predictions = self._get_latest_model_predictions() latest_predictions = self._get_latest_model_predictions()
cnn_prediction = self._get_cnn_pivot_prediction() cnn_prediction = self._get_cnn_pivot_prediction()
@ -3598,6 +3674,23 @@ class CleanTradingDashboard:
"transformer": {"inference_enabled": True, "training_enabled": True} "transformer": {"inference_enabled": True, "training_enabled": True}
} }
# Create mapping for backward compatibility between old dashboard names and new registry names
model_name_mapping = {
'dqn': 'dqn_agent',
'cnn': 'enhanced_cnn',
'cob_rl': 'cob_rl_model',
'decision_fusion': 'decision_fusion',
'transformer': 'transformer'
}
# Ensure we have toggle states for the old names used by the dashboard
for old_name, new_name in model_name_mapping.items():
if old_name not in toggle_states and new_name in toggle_states:
toggle_states[old_name] = toggle_states[new_name]
elif old_name not in toggle_states:
# Default state if neither old nor new name exists
toggle_states[old_name] = {"inference_enabled": True, "training_enabled": True}
# Helper function to safely calculate improvement percentage # Helper function to safely calculate improvement percentage
def safe_improvement_calc(initial, current, default_improvement=0.0): def safe_improvement_calc(initial, current, default_improvement=0.0):
try: try:
@ -3705,8 +3798,8 @@ class CleanTradingDashboard:
last_confidence = 0.68 last_confidence = 0.68
last_timestamp = datetime.now().strftime('%H:%M:%S') last_timestamp = datetime.now().strftime('%H:%M:%S')
# Get real DQN statistics from orchestrator # Get real DQN statistics from orchestrator (try both old and new names)
dqn_stats = orchestrator_stats.get('dqn_agent') dqn_stats = orchestrator_stats.get('dqn_agent') or orchestrator_stats.get('dqn')
dqn_current_loss = dqn_stats.current_loss if dqn_stats else None dqn_current_loss = dqn_stats.current_loss if dqn_stats else None
dqn_best_loss = dqn_stats.best_loss if dqn_stats else None dqn_best_loss = dqn_stats.best_loss if dqn_stats else None
dqn_accuracy = dqn_stats.accuracy if dqn_stats else None dqn_accuracy = dqn_stats.accuracy if dqn_stats else None
@ -3786,8 +3879,8 @@ class CleanTradingDashboard:
cnn_state = model_states.get('cnn', {}) cnn_state = model_states.get('cnn', {})
cnn_timing = get_model_timing_info('CNN') cnn_timing = get_model_timing_info('CNN')
# Get real CNN statistics from orchestrator # Get real CNN statistics from orchestrator (try both old and new names)
cnn_stats = orchestrator_stats.get('enhanced_cnn') cnn_stats = orchestrator_stats.get('enhanced_cnn') or orchestrator_stats.get('cnn')
cnn_active = cnn_stats is not None cnn_active = cnn_stats is not None
# Get latest CNN prediction from orchestrator statistics # Get latest CNN prediction from orchestrator statistics
@ -4153,12 +4246,15 @@ class CleanTradingDashboard:
# DEBUG: Log what we're returning # DEBUG: Log what we're returning
models_count = len(metrics.get('loaded_models', {})) models_count = len(metrics.get('loaded_models', {}))
logger.info(f"Training metrics being returned: {models_count} models loaded") logger.debug(f"Training metrics being returned: {models_count} models loaded")
if models_count == 0: if models_count == 0:
logger.warning("No models in loaded_models!") logger.warning("No models in loaded_models!")
logger.warning(f"Metrics keys: {list(metrics.keys())}") logger.warning(f"Metrics keys: {list(metrics.keys())}")
for model_name, model_info in metrics.get('loaded_models', {}).items(): logger.warning(f"Model states available: {list(model_states.keys()) if model_states else 'None'}")
logger.info(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}") logger.warning(f"Toggle states available: {list(toggle_states.keys()) if toggle_states else 'None'}")
else:
for model_name, model_info in metrics.get('loaded_models', {}).items():
logger.debug(f"Model {model_name}: active={model_info.get('active', False)}, checkpoint_loaded={model_info.get('checkpoint_loaded', False)}")
return metrics return metrics