PnL in reward, show leveraged power in dash (broken)

This commit is contained in:
Dobromir Popov
2025-07-29 17:42:00 +03:00
parent d35530a9e9
commit 3a532a1220
5 changed files with 553 additions and 49 deletions

View File

@ -3267,12 +3267,15 @@ class TradingOrchestrator:
avg_loss = model_stats.average_loss if model_stats else None
# Calculate reward for logging
current_pnl = self._get_current_position_pnl(self.symbol)
reward, _ = self._calculate_sophisticated_reward(
predicted_action,
predicted_confidence,
actual_price_change_pct,
time_diff_seconds / 60, # Convert to minutes
has_price_prediction=predicted_price is not None,
symbol=self.symbol,
current_position_pnl=current_pnl,
)
# Enhanced logging with detailed information
@ -3361,6 +3364,7 @@ class TradingOrchestrator:
) # Default to 0.5 if missing
# Calculate sophisticated reward based on multiple factors
current_pnl = self._get_current_position_pnl(symbol)
reward, was_correct = self._calculate_sophisticated_reward(
predicted_action,
prediction_confidence,
@ -3369,6 +3373,7 @@ class TradingOrchestrator:
inference_price is not None, # Add price prediction flag
symbol, # Pass symbol for position lookup
None, # Let method determine position status
current_position_pnl=current_pnl,
)
# Update model performance tracking
@ -3476,10 +3481,11 @@ class TradingOrchestrator:
has_price_prediction: bool = False,
symbol: str = None,
has_position: bool = None,
current_position_pnl: float = 0.0,
) -> tuple[float, bool]:
"""
Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
Now considers position status when evaluating HOLD decisions
Now considers position status and current P&L when evaluating decisions
Args:
predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@ -3489,6 +3495,7 @@ class TradingOrchestrator:
has_price_prediction: Whether the model made a price prediction
symbol: Trading symbol (for position lookup)
has_position: Whether we currently have a position (if None, will be looked up)
current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
Returns:
tuple: (reward, was_correct)
@ -3500,6 +3507,9 @@ class TradingOrchestrator:
# Determine current position status if not provided
if has_position is None and symbol:
has_position = self._has_open_position(symbol)
# Get current position P&L if we have a position
if has_position and current_position_pnl == 0.0:
current_position_pnl = self._get_current_position_pnl(symbol)
elif has_position is None:
has_position = False
@ -3518,19 +3528,37 @@ class TradingOrchestrator:
0, -price_change_pct
) # Positive for downward movement
elif predicted_action == "HOLD":
# HOLD evaluation now considers position status
# HOLD evaluation now considers position status AND current P&L
if has_position:
# If we have a position, HOLD is correct if price moved favorably or stayed stable
# This prevents penalizing HOLD when we're already in a profitable position
if price_change_pct > 0: # Price went up while holding - good
was_correct = True
directional_accuracy = price_change_pct # Reward based on profit
elif abs(price_change_pct) < movement_threshold: # Price stable - neutral
was_correct = True
directional_accuracy = movement_threshold - abs(price_change_pct)
else: # Price dropped while holding - bad, but less penalty than wrong direction
was_correct = False
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
# If we have a position, HOLD evaluation depends on P&L and price movement
if current_position_pnl > 0: # Currently profitable position
# Holding a profitable position is good if price continues favorably
if price_change_pct > 0: # Price went up while holding profitable position - excellent
was_correct = True
directional_accuracy = price_change_pct * 1.5 # Bonus for holding winners
elif abs(price_change_pct) < movement_threshold: # Price stable - good
was_correct = True
directional_accuracy = movement_threshold + (current_position_pnl / 100.0) # Reward based on existing profit
else: # Price dropped while holding profitable position - still okay but less reward
was_correct = True
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
elif current_position_pnl < 0: # Currently losing position
# Holding a losing position is generally bad - should consider closing
if price_change_pct > movement_threshold: # Price recovered - good hold
was_correct = True
directional_accuracy = price_change_pct * 0.8 # Reduced reward for recovery
else: # Price continued down or stayed flat - bad hold
was_correct = False
# Penalty proportional to loss magnitude
directional_accuracy = abs(current_position_pnl / 100.0) * 0.5 # Penalty for holding losers
else: # Breakeven position
# Standard HOLD evaluation for breakeven positions
if abs(price_change_pct) < movement_threshold: # Price stable - good
was_correct = True
directional_accuracy = movement_threshold - abs(price_change_pct)
else: # Price moved significantly - missed opportunity
was_correct = False
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
else:
# If we don't have a position, HOLD is correct if price stayed relatively stable
was_correct = abs(price_change_pct) < movement_threshold
@ -3627,12 +3655,16 @@ class TradingOrchestrator:
# Calculate reward if not provided
if sophisticated_reward is None:
symbol = record.get("symbol", self.symbol)
current_pnl = self._get_current_position_pnl(symbol)
sophisticated_reward, _ = self._calculate_sophisticated_reward(
record.get("action", "HOLD"),
record.get("confidence", 0.5),
price_change_pct,
record.get("time_diff_minutes", 1.0),
record.get("has_price_prediction", False),
symbol=symbol,
current_position_pnl=current_pnl,
)
# Train decision fusion model if it's the model being evaluated
@ -6510,7 +6542,7 @@ class TradingOrchestrator:
logger.error(f"Error getting combined model data for {symbol}: {e}")
return None
def _get_current_position_pnl(self, symbol: str, current_price: float) -> float:
def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
"""Get current position P&L for the symbol"""
try:
if self.trading_executor and hasattr(
@ -6518,16 +6550,22 @@ class TradingOrchestrator:
):
position = self.trading_executor.get_current_position(symbol)
if position:
entry_price = position.get("price", 0)
size = position.get("size", 0)
side = position.get("side", "LONG")
# If current_price is provided, calculate P&L manually
if current_price is not None:
entry_price = position.get("price", 0)
size = position.get("size", 0)
side = position.get("side", "LONG")
if entry_price and size > 0:
if side.upper() == "LONG":
pnl = (current_price - entry_price) * size
else: # SHORT
pnl = (entry_price - current_price) * size
return pnl
if entry_price and size > 0:
if side.upper() == "LONG":
pnl = (current_price - entry_price) * size
else: # SHORT
pnl = (entry_price - current_price) * size
return pnl
else:
# Use unrealized_pnl from position if available
if position.get("size", 0) > 0:
return float(position.get("unrealized_pnl", 0.0))
return 0.0
except Exception as e:
logger.debug(f"Error getting position P&L for {symbol}: {e}")
@ -6545,6 +6583,53 @@ class TradingOrchestrator:
except Exception:
return False
def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
"""
Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
Args:
base_reward: Original reward from confidence/execution
action: Action taken ('BUY', 'SELL', 'HOLD')
position_pnl: Current position P&L
has_position: Whether we have an open position
Returns:
Enhanced reward that incentivizes profitable behavior
"""
try:
enhanced_reward = base_reward
if has_position and position_pnl != 0.0:
# Position-based reward adjustments (similar to CNN but tuned for DQN)
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
if position_pnl > 0: # Profitable position
if action == "HOLD":
# Reward holding profitable positions (let winners run)
enhanced_reward += abs(pnl_factor) * 0.4
elif action in ["BUY", "SELL"]:
# Moderate reward for taking action on profitable positions
enhanced_reward += abs(pnl_factor) * 0.2
elif position_pnl < 0: # Losing position
if action == "HOLD":
# Strong penalty for holding losing positions (cut losses)
enhanced_reward -= abs(pnl_factor) * 1.0
elif action in ["BUY", "SELL"]:
# Strong reward for taking action to close losing positions
enhanced_reward += abs(pnl_factor) * 0.8
# Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
return enhanced_reward
except Exception as e:
logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
return base_reward
def _close_all_positions(self):
"""Close all open positions when clearing session"""
try:
@ -6889,28 +6974,35 @@ class TradingOrchestrator:
action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
dqn_action = action_mapping.get(action, 2)
# Calculate immediate reward based on confidence and execution
immediate_reward = confidence if action != "HOLD" else 0.0
# Get position information for enhanced rewards
has_position = self._has_open_position(symbol)
position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
# Calculate position-enhanced reward
base_reward = confidence if action != "HOLD" else 0.1
enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
base_reward, action, position_pnl, has_position
)
# Add experience to DQN
self.rl_agent.remember(
state=state,
action=dqn_action,
reward=immediate_reward,
reward=enhanced_reward,
next_state=state, # Will be updated with actual outcome later
done=False,
)
models_trained.append("dqn")
logger.debug(
f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
)
except Exception as e:
logger.debug(f"Error training DQN on decision: {e}")
# Train CNN model if available and enabled
if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"):
if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
try:
# Create CNN input features from base_data (same as inference)
cnn_features = self._create_cnn_features_from_base_data(
@ -6919,19 +7011,30 @@ class TradingOrchestrator:
# Create target based on action
target_mapping = {
"BUY": [1, 0, 0],
"SELL": [0, 1, 0],
"HOLD": [0, 0, 1],
"BUY": 0, # Action indices for CNN
"SELL": 1,
"HOLD": 2,
}
target = target_mapping.get(action, [0, 0, 1])
target_action = target_mapping.get(action, 2)
# Add training sample
self.cnn_model.add_training_sample(
cnn_features, target, weight=confidence
# Get position information for enhanced rewards
has_position = self._has_open_position(symbol)
position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
# Calculate base reward from confidence and add position-based enhancement
base_reward = confidence if action != "HOLD" else 0.1
# Add training data with position-based reward enhancement
self.cnn_model.add_training_data(
cnn_features,
target_action,
base_reward,
position_pnl=position_pnl,
has_position=has_position
)
models_trained.append("cnn")
logger.debug(f"🔍 Added CNN training sample: {action} {symbol}")
logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")
except Exception as e:
logger.debug(f"Error training CNN on decision: {e}")