artificially doule fees to promote more profitable trades

2025-07-17 19:22:35 +03:00
parent 6d55061e86
commit 26d440f772
3 changed files with 440 additions and 52 deletions
--- a/utils/reward_calculator.py
+++ b/utils/reward_calculator.py
@@ -91,33 +91,79 @@ class RewardCalculator:
            return 0.0

    def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
-        """Calculate enhanced reward for trading actions"""
+        """Calculate enhanced reward for trading actions with shifted neutral point
+        
+        Neutral reward is shifted to require profits that exceed double the fees,
+        which penalizes small profit trades and encourages holding for larger moves.
+        Current PnL is given more weight in the decision-making process.
+        """
        fee = self.base_fee_rate
+        double_fee = fee * 4  # Double the fees (2x open + 2x close = 4x base fee)
        frequency_penalty = self._calculate_frequency_penalty()
+        
        if action == 0:  # Buy
+            # Penalize buying more when already in profit
            reward = -fee - frequency_penalty
+            if current_pnl > 0:
+                # Reduce incentive to close profitable positions
+                reward -= current_pnl * 0.2
        elif action == 1:  # Sell
            profit_pct = price_change
-            net_profit = profit_pct - (fee * 2)
-            reward = net_profit * self.reward_scaling
+            
+            # Shift neutral point - require profit > double fees to be considered positive
+            net_profit = profit_pct - double_fee
+            
+            # Scale reward based on profit size
+            if net_profit > 0:
+                # Exponential reward for larger profits
+                reward = (net_profit ** 1.5) * self.reward_scaling
+            else:
+                # Linear penalty for losses
+                reward = net_profit * self.reward_scaling
+                
            reward -= frequency_penalty
            self.record_pnl(net_profit)
+            
+            # Add extra penalty for very small profits (less than 3x fees)
+            if 0 < profit_pct < (fee * 6):
+                reward -= 0.5  # Discourage tiny profit-taking
        else:  # Hold
            if is_profitable:
-                reward = self._calculate_holding_reward(position_held_time, price_change)
+                # Increase reward for holding profitable positions
+                profit_factor = min(5.0, current_pnl * 20)  # Cap at 5x
+                reward = self._calculate_holding_reward(position_held_time, price_change) * (1.0 + profit_factor)
+                
+                # Add bonus for holding through volatility when profitable
+                if volatility is not None and volatility > 0.001:
+                    reward += 0.1 * volatility * 100
            else:
-                reward = -0.0001
+                # Small penalty for holding losing positions
+                loss_factor = min(1.0, abs(current_pnl) * 10)
+                reward = -0.0001 * (1.0 + loss_factor)
+                
+                # But reduce penalty for very recent positions (give them time)
+                if position_held_time < 30:  # Less than 30 seconds
+                    reward *= 0.5
+        
+        # Prediction accuracy reward component
        if action in [0, 1] and predicted_change != 0:
            if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
                reward += abs(actual_change) * 5.0
            else:
                reward -= abs(predicted_change) * 2.0
-        reward += current_pnl * 0.1
+        
+        # Increase weight of current PnL in decision making (3x more than before)
+        reward += current_pnl * 0.3
+        
+        # Volatility penalty
        if volatility is not None:
            reward -= abs(volatility) * 100
+        
+        # Risk adjustment
        if self.risk_aversion > 0 and len(self.returns) > 1:
            returns_std = np.std(self.returns)
            reward -= returns_std * self.risk_aversion
+        
        self.record_trade(action)
        return reward