""" Improved Reward Function for RL Trading Agent This module provides a more sophisticated reward function for the RL trading agent that incorporates realistic trading fees, penalties for excessive trading, and rewards for successful holding of positions. """ import numpy as np from datetime import datetime, timedelta from collections import deque class ImprovedRewardCalculator: def __init__(self, max_drawdown_pct=0.1, # Maximum drawdown % risk_reward_ratio=1.5, # Risk-reward ratio base_fee_rate=0.0002, # 0.02% per transaction max_frequency_penalty=0.005, # Maximum 0.5% penalty for frequent trading holding_reward_rate=0.0001, # Small reward for holding profitable positions risk_adjusted=True, # Use Sharpe ratio for risk adjustment base_reward=1.0, # Base reward scale profit_factor=2.0, # Profit reward multiplier loss_factor=1.0, # Loss penalty multiplier trade_frequency_penalty=0.3, # Penalty for frequent trading position_duration_factor=0.05 # Reward for longer positions ): self.base_fee_rate = base_fee_rate self.max_frequency_penalty = max_frequency_penalty self.holding_reward_rate = holding_reward_rate self.risk_adjusted = risk_adjusted # New parameters self.base_reward = base_reward self.profit_factor = profit_factor self.loss_factor = loss_factor self.trade_frequency_penalty = trade_frequency_penalty self.position_duration_factor = position_duration_factor # Keep track of recent trades self.recent_trades = deque(maxlen=1000) self.trade_pnls = deque(maxlen=100) # For risk adjustment # Additional tracking metrics self.total_trades = 0 self.profitable_trades = 0 self.total_pnl = 0.0 self.daily_pnl = {} self.hourly_pnl = {} def record_trade(self, timestamp=None, action=None, price=None): """Record a trade for frequency tracking""" if timestamp is None: timestamp = datetime.now() self.recent_trades.append({ 'timestamp': timestamp, 'action': action, 'price': price }) def record_pnl(self, pnl): """Record a PnL result for risk adjustment and tracking metrics""" self.trade_pnls.append(pnl) # Update overall metrics self.total_trades += 1 self.total_pnl += pnl if pnl > 0: self.profitable_trades += 1 # Track daily and hourly PnL now = datetime.now() day_key = now.strftime('%Y-%m-%d') hour_key = now.strftime('%Y-%m-%d %H:00') # Update daily PnL if day_key not in self.daily_pnl: self.daily_pnl[day_key] = 0.0 self.daily_pnl[day_key] += pnl # Update hourly PnL if hour_key not in self.hourly_pnl: self.hourly_pnl[hour_key] = 0.0 self.hourly_pnl[hour_key] += pnl def _calculate_frequency_penalty(self): """Calculate penalty for trading too frequently""" if len(self.recent_trades) < 2: return 0.0 # Count trades in the last minute now = datetime.now() one_minute_ago = now - timedelta(minutes=1) trades_last_minute = sum(1 for trade in self.recent_trades if trade['timestamp'] > one_minute_ago) # Apply progressive penalty (more severe as frequency increases) if trades_last_minute <= 1: return 0.0 # No penalty for normal trading rate # Progressive penalty based on trade frequency penalty = min(self.max_frequency_penalty, self.base_fee_rate * trades_last_minute) return penalty def _calculate_holding_reward(self, position_held_time, price_change_pct): """Calculate reward for holding a position for some time""" if position_held_time <= 0 or price_change_pct <= 0: return 0.0 # No reward for unprofitable holds # Cap at 100 time units (seconds, minutes, etc.) capped_time = min(position_held_time, 100) # Scale reward by both time and price change reward = self.holding_reward_rate * capped_time * price_change_pct return reward def _calculate_risk_adjustment(self, reward): """Adjust rewards based on risk (simple Sharpe ratio implementation)""" if len(self.trade_pnls) < 5: return reward # Not enough data for adjustment # Calculate mean and standard deviation of returns pnl_array = np.array(self.trade_pnls) mean_return = np.mean(pnl_array) std_return = np.std(pnl_array) if std_return == 0: return reward # Avoid division by zero # Simplified Sharpe ratio sharpe = mean_return / std_return # Scale reward by Sharpe ratio (normalized to be around 1.0) adjustment_factor = np.clip(1.0 + 0.5 * sharpe, 0.5, 2.0) return reward * adjustment_factor def calculate_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False): """ Calculate the improved reward Args: action (int): 0 = Buy, 1 = Sell, 2 = Hold price_change (float): Percent price change for the trade position_held_time (int): Time position was held (in time units) volatility (float, optional): Market volatility measure is_profitable (bool): Whether current position is profitable Returns: float: Calculated reward value """ # Calculate trading fee fee = self.base_fee_rate # Calculate frequency penalty frequency_penalty = self._calculate_frequency_penalty() # Base reward calculation if action == 0: # Buy # Small penalty for transaction plus frequency penalty reward = -fee - frequency_penalty elif action == 1: # Sell # Calculate profit percentage minus fees (both entry and exit) profit_pct = price_change net_profit = profit_pct - (fee * 2) # Scale reward and apply frequency penalty reward = net_profit * 10 # Scale reward reward -= frequency_penalty # Record PnL for risk adjustment self.record_pnl(net_profit) else: # Hold # Small reward for holding a profitable position, small cost otherwise if is_profitable: reward = self._calculate_holding_reward(position_held_time, price_change) else: reward = -0.0001 # Very small negative reward # Apply risk adjustment if enabled if self.risk_adjusted: reward = self._calculate_risk_adjustment(reward) # Record this action for future frequency calculations self.record_trade(action=action) return reward # Example usage: if __name__ == "__main__": # Create calculator instance reward_calc = ImprovedRewardCalculator() # Example reward for a buy action buy_reward = reward_calc.calculate_reward(action=0, price_change=0) print(f"Buy action reward: {buy_reward:.5f}") # Record a trade for frequency tracking reward_calc.record_trade(action=0) # Wait a bit and make another trade to test frequency penalty import time time.sleep(0.1) # Example reward for a sell action with profit sell_reward = reward_calc.calculate_reward(action=1, price_change=0.015, position_held_time=60) print(f"Sell action reward (with profit): {sell_reward:.5f}") # Example reward for a hold action on profitable position hold_reward = reward_calc.calculate_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True) print(f"Hold action reward (profitable): {hold_reward:.5f}") # Example reward for a hold action on unprofitable position hold_reward_neg = reward_calc.calculate_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False) print(f"Hold action reward (unprofitable): {hold_reward_neg:.5f}")