PnL in reward, show leveraged power in dash (broken)
This commit is contained in:
@ -3,6 +3,7 @@ import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import torch.nn.functional as F
|
||||
from typing import List, Tuple, Dict, Any, Optional, Union
|
||||
@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module):
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
def add_training_data(self, state, action, reward):
|
||||
def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
|
||||
"""
|
||||
Add training data to the model's training buffer
|
||||
Add training data to the model's training buffer with position-based reward enhancement
|
||||
|
||||
Args:
|
||||
state: Input state
|
||||
action: Action taken
|
||||
reward: Reward received
|
||||
reward: Base reward received
|
||||
position_pnl: Current position P&L (0.0 if no position)
|
||||
has_position: Whether we currently have an open position
|
||||
"""
|
||||
try:
|
||||
# Enhance reward based on position status
|
||||
enhanced_reward = self._calculate_position_enhanced_reward(
|
||||
reward, action, position_pnl, has_position
|
||||
)
|
||||
|
||||
self.training_data.append({
|
||||
'state': state,
|
||||
'action': action,
|
||||
'reward': reward,
|
||||
'reward': enhanced_reward,
|
||||
'base_reward': reward, # Keep original reward for analysis
|
||||
'position_pnl': position_pnl,
|
||||
'has_position': has_position,
|
||||
'timestamp': time.time()
|
||||
})
|
||||
|
||||
@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module):
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding training data: {e}")
|
||||
|
||||
def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
|
||||
"""
|
||||
Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
|
||||
|
||||
Args:
|
||||
base_reward: Original reward from price prediction accuracy
|
||||
action: Action taken ('BUY', 'SELL', 'HOLD')
|
||||
position_pnl: Current position P&L
|
||||
has_position: Whether we have an open position
|
||||
|
||||
Returns:
|
||||
Enhanced reward that incentivizes profitable behavior
|
||||
"""
|
||||
try:
|
||||
enhanced_reward = base_reward
|
||||
|
||||
if has_position and position_pnl != 0.0:
|
||||
# Position-based reward adjustments
|
||||
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
|
||||
|
||||
if position_pnl > 0: # Profitable position
|
||||
if action == "HOLD":
|
||||
# Reward holding profitable positions (let winners run)
|
||||
enhanced_reward += abs(pnl_factor) * 0.5
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Moderate reward for taking action on profitable positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.3
|
||||
|
||||
elif position_pnl < 0: # Losing position
|
||||
if action == "HOLD":
|
||||
# Penalty for holding losing positions (cut losses)
|
||||
enhanced_reward -= abs(pnl_factor) * 0.8
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Reward for taking action to close losing positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.6
|
||||
|
||||
# Ensure reward doesn't become extreme
|
||||
enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
|
||||
|
||||
return enhanced_reward
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating position-enhanced reward: {e}")
|
||||
return base_reward
|
||||
|
||||
def save(self, path):
|
||||
"""Save model weights and architecture"""
|
||||
|
Reference in New Issue
Block a user