PnL in reward, show leveraged power in dash (broken)

2025-07-29 17:42:00 +03:00
parent d35530a9e9
commit 3a532a1220
5 changed files with 553 additions and 49 deletions
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@@ -3,6 +3,7 @@ import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import os
+import time
 import logging
 import torch.nn.functional as F
 from typing import List, Tuple, Dict, Any, Optional, Union
@@ -652,20 +653,30 @@ class EnhancedCNN(nn.Module):
                'weighted_strength': 0.0
            }
    
-    def add_training_data(self, state, action, reward):
+    def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
        """
-        Add training data to the model's training buffer
+        Add training data to the model's training buffer with position-based reward enhancement
        
        Args:
            state: Input state
            action: Action taken
-            reward: Reward received
+            reward: Base reward received
+            position_pnl: Current position P&L (0.0 if no position)
+            has_position: Whether we currently have an open position
        """
        try:
+            # Enhance reward based on position status
+            enhanced_reward = self._calculate_position_enhanced_reward(
+                reward, action, position_pnl, has_position
+            )
+            
            self.training_data.append({
                'state': state,
                'action': action,
-                'reward': reward,
+                'reward': enhanced_reward,
+                'base_reward': reward,  # Keep original reward for analysis
+                'position_pnl': position_pnl,
+                'has_position': has_position,
                'timestamp': time.time()
            })
            
@@ -675,6 +686,51 @@ class EnhancedCNN(nn.Module):
                
        except Exception as e:
            logger.error(f"Error adding training data: {e}")
+
+    def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
+        """
+        Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
+        
+        Args:
+            base_reward: Original reward from price prediction accuracy
+            action: Action taken ('BUY', 'SELL', 'HOLD')
+            position_pnl: Current position P&L
+            has_position: Whether we have an open position
+            
+        Returns:
+            Enhanced reward that incentivizes profitable behavior
+        """
+        try:
+            enhanced_reward = base_reward
+            
+            if has_position and position_pnl != 0.0:
+                # Position-based reward adjustments
+                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
+                
+                if position_pnl > 0:  # Profitable position
+                    if action == "HOLD":
+                        # Reward holding profitable positions (let winners run)
+                        enhanced_reward += abs(pnl_factor) * 0.5
+                    elif action in ["BUY", "SELL"]:
+                        # Moderate reward for taking action on profitable positions
+                        enhanced_reward += abs(pnl_factor) * 0.3
+                        
+                elif position_pnl < 0:  # Losing position
+                    if action == "HOLD":
+                        # Penalty for holding losing positions (cut losses)
+                        enhanced_reward -= abs(pnl_factor) * 0.8
+                    elif action in ["BUY", "SELL"]:
+                        # Reward for taking action to close losing positions
+                        enhanced_reward += abs(pnl_factor) * 0.6
+                        
+            # Ensure reward doesn't become extreme
+            enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
+            
+            return enhanced_reward
+            
+        except Exception as e:
+            logger.error(f"Error calculating position-enhanced reward: {e}")
+            return base_reward
        
    def save(self, path):
        """Save model weights and architecture"""