better rewards, fixed TZ at last
This commit is contained in:
@ -20,8 +20,10 @@ class TradingEnvironment(gym.Env):
|
||||
window_size: int = 20,
|
||||
risk_aversion: float = 0.2, # Controls how much to penalize volatility
|
||||
price_scaling: str = 'zscore', # 'zscore', 'minmax', or 'raw'
|
||||
reward_scaling: float = 10.0, # Scale factor for rewards
|
||||
episode_penalty: float = 0.1): # Penalty for active positions at end of episode
|
||||
reward_scaling: float = 10.0, # Scale factor for rewards
|
||||
episode_penalty: float = 0.1, # Penalty for active positions at end of episode
|
||||
min_profit_after_fees: float = 0.0005 # Deadzone: require >= 5 bps beyond fees
|
||||
):
|
||||
super(TradingEnvironment, self).__init__()
|
||||
|
||||
self.data = data
|
||||
@ -33,6 +35,7 @@ class TradingEnvironment(gym.Env):
|
||||
self.price_scaling = price_scaling
|
||||
self.reward_scaling = reward_scaling
|
||||
self.episode_penalty = episode_penalty
|
||||
self.min_profit_after_fees = max(0.0, float(min_profit_after_fees))
|
||||
|
||||
# Preprocess data if needed
|
||||
self._preprocess_data()
|
||||
@ -177,8 +180,14 @@ class TradingEnvironment(gym.Env):
|
||||
price_diff = current_price - self.entry_price
|
||||
pnl = price_diff / self.entry_price - 2 * self.fee_rate # Account for entry and exit fees
|
||||
|
||||
# Adjust reward based on PnL and risk
|
||||
reward = pnl * self.reward_scaling
|
||||
# Deadzone to discourage micro profits
|
||||
if pnl > 0 and pnl < self.min_profit_after_fees:
|
||||
reward = -self.fee_rate
|
||||
elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
|
||||
reward = pnl * self.reward_scaling * 0.5
|
||||
else:
|
||||
effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
|
||||
reward = effective_pnl * self.reward_scaling
|
||||
|
||||
# Track trade performance
|
||||
self.total_trades += 1
|
||||
@ -212,8 +221,12 @@ class TradingEnvironment(gym.Env):
|
||||
price_diff = current_price - self.entry_price
|
||||
unrealized_pnl = price_diff / self.entry_price
|
||||
|
||||
# Small reward/penalty based on unrealized P&L
|
||||
reward = unrealized_pnl * 0.05 # Scale down to encourage holding good positions
|
||||
# Encourage holding only if unrealized edge exceeds deadzone
|
||||
unrealized_edge = unrealized_pnl
|
||||
if abs(unrealized_edge) >= self.min_profit_after_fees:
|
||||
reward = unrealized_edge * (self.reward_scaling * 0.2)
|
||||
else:
|
||||
reward = -0.0002
|
||||
|
||||
elif self.position < 0: # Short position
|
||||
if action == 0: # BUY (close short)
|
||||
@ -221,8 +234,13 @@ class TradingEnvironment(gym.Env):
|
||||
price_diff = self.entry_price - current_price
|
||||
pnl = price_diff / self.entry_price - 2 * self.fee_rate # Account for entry and exit fees
|
||||
|
||||
# Adjust reward based on PnL and risk
|
||||
reward = pnl * self.reward_scaling
|
||||
if pnl > 0 and pnl < self.min_profit_after_fees:
|
||||
reward = -self.fee_rate
|
||||
elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
|
||||
reward = pnl * self.reward_scaling * 0.5
|
||||
else:
|
||||
effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
|
||||
reward = effective_pnl * self.reward_scaling
|
||||
|
||||
# Track trade performance
|
||||
self.total_trades += 1
|
||||
@ -256,8 +274,12 @@ class TradingEnvironment(gym.Env):
|
||||
price_diff = self.entry_price - current_price
|
||||
unrealized_pnl = price_diff / self.entry_price
|
||||
|
||||
# Small reward/penalty based on unrealized P&L
|
||||
reward = unrealized_pnl * 0.05 # Scale down to encourage holding good positions
|
||||
# Encourage holding only if unrealized edge exceeds deadzone
|
||||
unrealized_edge = unrealized_pnl
|
||||
if abs(unrealized_edge) >= self.min_profit_after_fees:
|
||||
reward = unrealized_edge * (self.reward_scaling * 0.2)
|
||||
else:
|
||||
reward = -0.0002
|
||||
|
||||
# Record the action
|
||||
self.actions_taken.append(action)
|
||||
|
Reference in New Issue
Block a user