PnL in reward, show leveraged power in dash (broken)
This commit is contained in:
@ -3267,12 +3267,15 @@ class TradingOrchestrator:
|
||||
avg_loss = model_stats.average_loss if model_stats else None
|
||||
|
||||
# Calculate reward for logging
|
||||
current_pnl = self._get_current_position_pnl(self.symbol)
|
||||
reward, _ = self._calculate_sophisticated_reward(
|
||||
predicted_action,
|
||||
predicted_confidence,
|
||||
actual_price_change_pct,
|
||||
time_diff_seconds / 60, # Convert to minutes
|
||||
has_price_prediction=predicted_price is not None,
|
||||
symbol=self.symbol,
|
||||
current_position_pnl=current_pnl,
|
||||
)
|
||||
|
||||
# Enhanced logging with detailed information
|
||||
@ -3361,6 +3364,7 @@ class TradingOrchestrator:
|
||||
) # Default to 0.5 if missing
|
||||
|
||||
# Calculate sophisticated reward based on multiple factors
|
||||
current_pnl = self._get_current_position_pnl(symbol)
|
||||
reward, was_correct = self._calculate_sophisticated_reward(
|
||||
predicted_action,
|
||||
prediction_confidence,
|
||||
@ -3369,6 +3373,7 @@ class TradingOrchestrator:
|
||||
inference_price is not None, # Add price prediction flag
|
||||
symbol, # Pass symbol for position lookup
|
||||
None, # Let method determine position status
|
||||
current_position_pnl=current_pnl,
|
||||
)
|
||||
|
||||
# Update model performance tracking
|
||||
@ -3476,10 +3481,11 @@ class TradingOrchestrator:
|
||||
has_price_prediction: bool = False,
|
||||
symbol: str = None,
|
||||
has_position: bool = None,
|
||||
current_position_pnl: float = 0.0,
|
||||
) -> tuple[float, bool]:
|
||||
"""
|
||||
Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
|
||||
Now considers position status when evaluating HOLD decisions
|
||||
Now considers position status and current P&L when evaluating decisions
|
||||
|
||||
Args:
|
||||
predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
|
||||
@ -3489,6 +3495,7 @@ class TradingOrchestrator:
|
||||
has_price_prediction: Whether the model made a price prediction
|
||||
symbol: Trading symbol (for position lookup)
|
||||
has_position: Whether we currently have a position (if None, will be looked up)
|
||||
current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
|
||||
|
||||
Returns:
|
||||
tuple: (reward, was_correct)
|
||||
@ -3500,6 +3507,9 @@ class TradingOrchestrator:
|
||||
# Determine current position status if not provided
|
||||
if has_position is None and symbol:
|
||||
has_position = self._has_open_position(symbol)
|
||||
# Get current position P&L if we have a position
|
||||
if has_position and current_position_pnl == 0.0:
|
||||
current_position_pnl = self._get_current_position_pnl(symbol)
|
||||
elif has_position is None:
|
||||
has_position = False
|
||||
|
||||
@ -3518,19 +3528,37 @@ class TradingOrchestrator:
|
||||
0, -price_change_pct
|
||||
) # Positive for downward movement
|
||||
elif predicted_action == "HOLD":
|
||||
# HOLD evaluation now considers position status
|
||||
# HOLD evaluation now considers position status AND current P&L
|
||||
if has_position:
|
||||
# If we have a position, HOLD is correct if price moved favorably or stayed stable
|
||||
# This prevents penalizing HOLD when we're already in a profitable position
|
||||
if price_change_pct > 0: # Price went up while holding - good
|
||||
was_correct = True
|
||||
directional_accuracy = price_change_pct # Reward based on profit
|
||||
elif abs(price_change_pct) < movement_threshold: # Price stable - neutral
|
||||
was_correct = True
|
||||
directional_accuracy = movement_threshold - abs(price_change_pct)
|
||||
else: # Price dropped while holding - bad, but less penalty than wrong direction
|
||||
was_correct = False
|
||||
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
|
||||
# If we have a position, HOLD evaluation depends on P&L and price movement
|
||||
if current_position_pnl > 0: # Currently profitable position
|
||||
# Holding a profitable position is good if price continues favorably
|
||||
if price_change_pct > 0: # Price went up while holding profitable position - excellent
|
||||
was_correct = True
|
||||
directional_accuracy = price_change_pct * 1.5 # Bonus for holding winners
|
||||
elif abs(price_change_pct) < movement_threshold: # Price stable - good
|
||||
was_correct = True
|
||||
directional_accuracy = movement_threshold + (current_position_pnl / 100.0) # Reward based on existing profit
|
||||
else: # Price dropped while holding profitable position - still okay but less reward
|
||||
was_correct = True
|
||||
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
|
||||
elif current_position_pnl < 0: # Currently losing position
|
||||
# Holding a losing position is generally bad - should consider closing
|
||||
if price_change_pct > movement_threshold: # Price recovered - good hold
|
||||
was_correct = True
|
||||
directional_accuracy = price_change_pct * 0.8 # Reduced reward for recovery
|
||||
else: # Price continued down or stayed flat - bad hold
|
||||
was_correct = False
|
||||
# Penalty proportional to loss magnitude
|
||||
directional_accuracy = abs(current_position_pnl / 100.0) * 0.5 # Penalty for holding losers
|
||||
else: # Breakeven position
|
||||
# Standard HOLD evaluation for breakeven positions
|
||||
if abs(price_change_pct) < movement_threshold: # Price stable - good
|
||||
was_correct = True
|
||||
directional_accuracy = movement_threshold - abs(price_change_pct)
|
||||
else: # Price moved significantly - missed opportunity
|
||||
was_correct = False
|
||||
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
|
||||
else:
|
||||
# If we don't have a position, HOLD is correct if price stayed relatively stable
|
||||
was_correct = abs(price_change_pct) < movement_threshold
|
||||
@ -3627,12 +3655,16 @@ class TradingOrchestrator:
|
||||
|
||||
# Calculate reward if not provided
|
||||
if sophisticated_reward is None:
|
||||
symbol = record.get("symbol", self.symbol)
|
||||
current_pnl = self._get_current_position_pnl(symbol)
|
||||
sophisticated_reward, _ = self._calculate_sophisticated_reward(
|
||||
record.get("action", "HOLD"),
|
||||
record.get("confidence", 0.5),
|
||||
price_change_pct,
|
||||
record.get("time_diff_minutes", 1.0),
|
||||
record.get("has_price_prediction", False),
|
||||
symbol=symbol,
|
||||
current_position_pnl=current_pnl,
|
||||
)
|
||||
|
||||
# Train decision fusion model if it's the model being evaluated
|
||||
@ -6510,7 +6542,7 @@ class TradingOrchestrator:
|
||||
logger.error(f"Error getting combined model data for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _get_current_position_pnl(self, symbol: str, current_price: float) -> float:
|
||||
def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
|
||||
"""Get current position P&L for the symbol"""
|
||||
try:
|
||||
if self.trading_executor and hasattr(
|
||||
@ -6518,16 +6550,22 @@ class TradingOrchestrator:
|
||||
):
|
||||
position = self.trading_executor.get_current_position(symbol)
|
||||
if position:
|
||||
entry_price = position.get("price", 0)
|
||||
size = position.get("size", 0)
|
||||
side = position.get("side", "LONG")
|
||||
# If current_price is provided, calculate P&L manually
|
||||
if current_price is not None:
|
||||
entry_price = position.get("price", 0)
|
||||
size = position.get("size", 0)
|
||||
side = position.get("side", "LONG")
|
||||
|
||||
if entry_price and size > 0:
|
||||
if side.upper() == "LONG":
|
||||
pnl = (current_price - entry_price) * size
|
||||
else: # SHORT
|
||||
pnl = (entry_price - current_price) * size
|
||||
return pnl
|
||||
if entry_price and size > 0:
|
||||
if side.upper() == "LONG":
|
||||
pnl = (current_price - entry_price) * size
|
||||
else: # SHORT
|
||||
pnl = (entry_price - current_price) * size
|
||||
return pnl
|
||||
else:
|
||||
# Use unrealized_pnl from position if available
|
||||
if position.get("size", 0) > 0:
|
||||
return float(position.get("unrealized_pnl", 0.0))
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.debug(f"Error getting position P&L for {symbol}: {e}")
|
||||
@ -6545,6 +6583,53 @@ class TradingOrchestrator:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
|
||||
"""
|
||||
Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
|
||||
|
||||
Args:
|
||||
base_reward: Original reward from confidence/execution
|
||||
action: Action taken ('BUY', 'SELL', 'HOLD')
|
||||
position_pnl: Current position P&L
|
||||
has_position: Whether we have an open position
|
||||
|
||||
Returns:
|
||||
Enhanced reward that incentivizes profitable behavior
|
||||
"""
|
||||
try:
|
||||
enhanced_reward = base_reward
|
||||
|
||||
if has_position and position_pnl != 0.0:
|
||||
# Position-based reward adjustments (similar to CNN but tuned for DQN)
|
||||
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
|
||||
|
||||
if position_pnl > 0: # Profitable position
|
||||
if action == "HOLD":
|
||||
# Reward holding profitable positions (let winners run)
|
||||
enhanced_reward += abs(pnl_factor) * 0.4
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Moderate reward for taking action on profitable positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.2
|
||||
|
||||
elif position_pnl < 0: # Losing position
|
||||
if action == "HOLD":
|
||||
# Strong penalty for holding losing positions (cut losses)
|
||||
enhanced_reward -= abs(pnl_factor) * 1.0
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Strong reward for taking action to close losing positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.8
|
||||
|
||||
# Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
|
||||
enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
|
||||
|
||||
return enhanced_reward
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
|
||||
return base_reward
|
||||
|
||||
def _close_all_positions(self):
|
||||
"""Close all open positions when clearing session"""
|
||||
try:
|
||||
@ -6889,28 +6974,35 @@ class TradingOrchestrator:
|
||||
action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
|
||||
dqn_action = action_mapping.get(action, 2)
|
||||
|
||||
# Calculate immediate reward based on confidence and execution
|
||||
immediate_reward = confidence if action != "HOLD" else 0.0
|
||||
# Get position information for enhanced rewards
|
||||
has_position = self._has_open_position(symbol)
|
||||
position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
|
||||
|
||||
# Calculate position-enhanced reward
|
||||
base_reward = confidence if action != "HOLD" else 0.1
|
||||
enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
|
||||
base_reward, action, position_pnl, has_position
|
||||
)
|
||||
|
||||
# Add experience to DQN
|
||||
self.rl_agent.remember(
|
||||
state=state,
|
||||
action=dqn_action,
|
||||
reward=immediate_reward,
|
||||
reward=enhanced_reward,
|
||||
next_state=state, # Will be updated with actual outcome later
|
||||
done=False,
|
||||
)
|
||||
|
||||
models_trained.append("dqn")
|
||||
logger.debug(
|
||||
f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
|
||||
f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error training DQN on decision: {e}")
|
||||
|
||||
# Train CNN model if available and enabled
|
||||
if self.cnn_model and hasattr(self.cnn_model, "add_training_sample") and self.is_model_training_enabled("cnn"):
|
||||
if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
|
||||
try:
|
||||
# Create CNN input features from base_data (same as inference)
|
||||
cnn_features = self._create_cnn_features_from_base_data(
|
||||
@ -6919,19 +7011,30 @@ class TradingOrchestrator:
|
||||
|
||||
# Create target based on action
|
||||
target_mapping = {
|
||||
"BUY": [1, 0, 0],
|
||||
"SELL": [0, 1, 0],
|
||||
"HOLD": [0, 0, 1],
|
||||
"BUY": 0, # Action indices for CNN
|
||||
"SELL": 1,
|
||||
"HOLD": 2,
|
||||
}
|
||||
target = target_mapping.get(action, [0, 0, 1])
|
||||
target_action = target_mapping.get(action, 2)
|
||||
|
||||
# Add training sample
|
||||
self.cnn_model.add_training_sample(
|
||||
cnn_features, target, weight=confidence
|
||||
# Get position information for enhanced rewards
|
||||
has_position = self._has_open_position(symbol)
|
||||
position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0
|
||||
|
||||
# Calculate base reward from confidence and add position-based enhancement
|
||||
base_reward = confidence if action != "HOLD" else 0.1
|
||||
|
||||
# Add training data with position-based reward enhancement
|
||||
self.cnn_model.add_training_data(
|
||||
cnn_features,
|
||||
target_action,
|
||||
base_reward,
|
||||
position_pnl=position_pnl,
|
||||
has_position=has_position
|
||||
)
|
||||
|
||||
models_trained.append("cnn")
|
||||
logger.debug(f"🔍 Added CNN training sample: {action} {symbol}")
|
||||
logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error training CNN on decision: {e}")
|
||||
|
Reference in New Issue
Block a user