better rewards, fixed TZ at last

2025-08-08 01:53:17 +03:00
parent ded7e7f008
commit b80e1c1eba
3 changed files with 72 additions and 16 deletions
--- a/NN/utils/trading_env.py
+++ b/NN/utils/trading_env.py
@@ -20,8 +20,10 @@ class TradingEnvironment(gym.Env):
                 window_size: int = 20,
                 risk_aversion: float = 0.2,  # Controls how much to penalize volatility
                 price_scaling: str = 'zscore',  # 'zscore', 'minmax', or 'raw'
-                 reward_scaling: float = 10.0,   # Scale factor for rewards
-                 episode_penalty: float = 0.1):  # Penalty for active positions at end of episode
+                  reward_scaling: float = 10.0,   # Scale factor for rewards
+                  episode_penalty: float = 0.1,   # Penalty for active positions at end of episode
+                  min_profit_after_fees: float = 0.0005  # Deadzone: require >= 5 bps beyond fees
+                  ):
        super(TradingEnvironment, self).__init__()
        
        self.data = data
@@ -33,6 +35,7 @@ class TradingEnvironment(gym.Env):
        self.price_scaling = price_scaling
        self.reward_scaling = reward_scaling
        self.episode_penalty = episode_penalty
+        self.min_profit_after_fees = max(0.0, float(min_profit_after_fees))
        
        # Preprocess data if needed
        self._preprocess_data()
@@ -177,8 +180,14 @@ class TradingEnvironment(gym.Env):
                price_diff = current_price - self.entry_price
                pnl = price_diff / self.entry_price - 2 * self.fee_rate  # Account for entry and exit fees
                
-                # Adjust reward based on PnL and risk
-                reward = pnl * self.reward_scaling
+                # Deadzone to discourage micro profits
+                if pnl > 0 and pnl < self.min_profit_after_fees:
+                    reward = -self.fee_rate
+                elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
+                    reward = pnl * self.reward_scaling * 0.5
+                else:
+                    effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
+                    reward = effective_pnl * self.reward_scaling
                
                # Track trade performance
                self.total_trades += 1
@@ -212,8 +221,12 @@ class TradingEnvironment(gym.Env):
                price_diff = current_price - self.entry_price
                unrealized_pnl = price_diff / self.entry_price
                
-                # Small reward/penalty based on unrealized P&L
-                reward = unrealized_pnl * 0.05  # Scale down to encourage holding good positions
+                # Encourage holding only if unrealized edge exceeds deadzone
+                unrealized_edge = unrealized_pnl
+                if abs(unrealized_edge) >= self.min_profit_after_fees:
+                    reward = unrealized_edge * (self.reward_scaling * 0.2)
+                else:
+                    reward = -0.0002
        
        elif self.position < 0:  # Short position
            if action == 0:  # BUY (close short)
@@ -221,8 +234,13 @@ class TradingEnvironment(gym.Env):
                price_diff = self.entry_price - current_price
                pnl = price_diff / self.entry_price - 2 * self.fee_rate  # Account for entry and exit fees
                
-                # Adjust reward based on PnL and risk
-                reward = pnl * self.reward_scaling
+                if pnl > 0 and pnl < self.min_profit_after_fees:
+                    reward = -self.fee_rate
+                elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
+                    reward = pnl * self.reward_scaling * 0.5
+                else:
+                    effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
+                    reward = effective_pnl * self.reward_scaling
                
                # Track trade performance
                self.total_trades += 1
@@ -256,8 +274,12 @@ class TradingEnvironment(gym.Env):
                price_diff = self.entry_price - current_price
                unrealized_pnl = price_diff / self.entry_price
                
-                # Small reward/penalty based on unrealized P&L
-                reward = unrealized_pnl * 0.05  # Scale down to encourage holding good positions
+                # Encourage holding only if unrealized edge exceeds deadzone
+                unrealized_edge = unrealized_pnl
+                if abs(unrealized_edge) >= self.min_profit_after_fees:
+                    reward = unrealized_edge * (self.reward_scaling * 0.2)
+                else:
+                    reward = -0.0002
        
        # Record the action
        self.actions_taken.append(action)