artificially doule fees to promote more profitable trades
This commit is contained in:
@ -91,33 +91,79 @@ class RewardCalculator:
|
||||
return 0.0
|
||||
|
||||
def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
|
||||
"""Calculate enhanced reward for trading actions"""
|
||||
"""Calculate enhanced reward for trading actions with shifted neutral point
|
||||
|
||||
Neutral reward is shifted to require profits that exceed double the fees,
|
||||
which penalizes small profit trades and encourages holding for larger moves.
|
||||
Current PnL is given more weight in the decision-making process.
|
||||
"""
|
||||
fee = self.base_fee_rate
|
||||
double_fee = fee * 4 # Double the fees (2x open + 2x close = 4x base fee)
|
||||
frequency_penalty = self._calculate_frequency_penalty()
|
||||
|
||||
if action == 0: # Buy
|
||||
# Penalize buying more when already in profit
|
||||
reward = -fee - frequency_penalty
|
||||
if current_pnl > 0:
|
||||
# Reduce incentive to close profitable positions
|
||||
reward -= current_pnl * 0.2
|
||||
elif action == 1: # Sell
|
||||
profit_pct = price_change
|
||||
net_profit = profit_pct - (fee * 2)
|
||||
reward = net_profit * self.reward_scaling
|
||||
|
||||
# Shift neutral point - require profit > double fees to be considered positive
|
||||
net_profit = profit_pct - double_fee
|
||||
|
||||
# Scale reward based on profit size
|
||||
if net_profit > 0:
|
||||
# Exponential reward for larger profits
|
||||
reward = (net_profit ** 1.5) * self.reward_scaling
|
||||
else:
|
||||
# Linear penalty for losses
|
||||
reward = net_profit * self.reward_scaling
|
||||
|
||||
reward -= frequency_penalty
|
||||
self.record_pnl(net_profit)
|
||||
|
||||
# Add extra penalty for very small profits (less than 3x fees)
|
||||
if 0 < profit_pct < (fee * 6):
|
||||
reward -= 0.5 # Discourage tiny profit-taking
|
||||
else: # Hold
|
||||
if is_profitable:
|
||||
reward = self._calculate_holding_reward(position_held_time, price_change)
|
||||
# Increase reward for holding profitable positions
|
||||
profit_factor = min(5.0, current_pnl * 20) # Cap at 5x
|
||||
reward = self._calculate_holding_reward(position_held_time, price_change) * (1.0 + profit_factor)
|
||||
|
||||
# Add bonus for holding through volatility when profitable
|
||||
if volatility is not None and volatility > 0.001:
|
||||
reward += 0.1 * volatility * 100
|
||||
else:
|
||||
reward = -0.0001
|
||||
# Small penalty for holding losing positions
|
||||
loss_factor = min(1.0, abs(current_pnl) * 10)
|
||||
reward = -0.0001 * (1.0 + loss_factor)
|
||||
|
||||
# But reduce penalty for very recent positions (give them time)
|
||||
if position_held_time < 30: # Less than 30 seconds
|
||||
reward *= 0.5
|
||||
|
||||
# Prediction accuracy reward component
|
||||
if action in [0, 1] and predicted_change != 0:
|
||||
if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
|
||||
reward += abs(actual_change) * 5.0
|
||||
else:
|
||||
reward -= abs(predicted_change) * 2.0
|
||||
reward += current_pnl * 0.1
|
||||
|
||||
# Increase weight of current PnL in decision making (3x more than before)
|
||||
reward += current_pnl * 0.3
|
||||
|
||||
# Volatility penalty
|
||||
if volatility is not None:
|
||||
reward -= abs(volatility) * 100
|
||||
|
||||
# Risk adjustment
|
||||
if self.risk_aversion > 0 and len(self.returns) > 1:
|
||||
returns_std = np.std(self.returns)
|
||||
reward -= returns_std * self.risk_aversion
|
||||
|
||||
self.record_trade(action)
|
||||
return reward
|
||||
|
||||
|
Reference in New Issue
Block a user