artificially doule fees to promote more profitable trades

This commit is contained in:
Dobromir Popov
2025-07-17 19:22:35 +03:00
parent 6d55061e86
commit 26d440f772
3 changed files with 440 additions and 52 deletions

View File

@ -91,33 +91,79 @@ class RewardCalculator:
return 0.0
def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
"""Calculate enhanced reward for trading actions"""
"""Calculate enhanced reward for trading actions with shifted neutral point
Neutral reward is shifted to require profits that exceed double the fees,
which penalizes small profit trades and encourages holding for larger moves.
Current PnL is given more weight in the decision-making process.
"""
fee = self.base_fee_rate
double_fee = fee * 4 # Double the fees (2x open + 2x close = 4x base fee)
frequency_penalty = self._calculate_frequency_penalty()
if action == 0: # Buy
# Penalize buying more when already in profit
reward = -fee - frequency_penalty
if current_pnl > 0:
# Reduce incentive to close profitable positions
reward -= current_pnl * 0.2
elif action == 1: # Sell
profit_pct = price_change
net_profit = profit_pct - (fee * 2)
reward = net_profit * self.reward_scaling
# Shift neutral point - require profit > double fees to be considered positive
net_profit = profit_pct - double_fee
# Scale reward based on profit size
if net_profit > 0:
# Exponential reward for larger profits
reward = (net_profit ** 1.5) * self.reward_scaling
else:
# Linear penalty for losses
reward = net_profit * self.reward_scaling
reward -= frequency_penalty
self.record_pnl(net_profit)
# Add extra penalty for very small profits (less than 3x fees)
if 0 < profit_pct < (fee * 6):
reward -= 0.5 # Discourage tiny profit-taking
else: # Hold
if is_profitable:
reward = self._calculate_holding_reward(position_held_time, price_change)
# Increase reward for holding profitable positions
profit_factor = min(5.0, current_pnl * 20) # Cap at 5x
reward = self._calculate_holding_reward(position_held_time, price_change) * (1.0 + profit_factor)
# Add bonus for holding through volatility when profitable
if volatility is not None and volatility > 0.001:
reward += 0.1 * volatility * 100
else:
reward = -0.0001
# Small penalty for holding losing positions
loss_factor = min(1.0, abs(current_pnl) * 10)
reward = -0.0001 * (1.0 + loss_factor)
# But reduce penalty for very recent positions (give them time)
if position_held_time < 30: # Less than 30 seconds
reward *= 0.5
# Prediction accuracy reward component
if action in [0, 1] and predicted_change != 0:
if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
reward += abs(actual_change) * 5.0
else:
reward -= abs(predicted_change) * 2.0
reward += current_pnl * 0.1
# Increase weight of current PnL in decision making (3x more than before)
reward += current_pnl * 0.3
# Volatility penalty
if volatility is not None:
reward -= abs(volatility) * 100
# Risk adjustment
if self.risk_aversion > 0 and len(self.returns) > 1:
returns_std = np.std(self.returns)
reward -= returns_std * self.risk_aversion
self.record_trade(action)
return reward