better rewards, fixed TZ at last

This commit is contained in:
Dobromir Popov
2025-08-08 01:53:17 +03:00
parent ded7e7f008
commit b80e1c1eba
3 changed files with 72 additions and 16 deletions

View File

@ -20,8 +20,10 @@ class TradingEnvironment(gym.Env):
window_size: int = 20,
risk_aversion: float = 0.2, # Controls how much to penalize volatility
price_scaling: str = 'zscore', # 'zscore', 'minmax', or 'raw'
reward_scaling: float = 10.0, # Scale factor for rewards
episode_penalty: float = 0.1): # Penalty for active positions at end of episode
reward_scaling: float = 10.0, # Scale factor for rewards
episode_penalty: float = 0.1, # Penalty for active positions at end of episode
min_profit_after_fees: float = 0.0005 # Deadzone: require >= 5 bps beyond fees
):
super(TradingEnvironment, self).__init__()
self.data = data
@ -33,6 +35,7 @@ class TradingEnvironment(gym.Env):
self.price_scaling = price_scaling
self.reward_scaling = reward_scaling
self.episode_penalty = episode_penalty
self.min_profit_after_fees = max(0.0, float(min_profit_after_fees))
# Preprocess data if needed
self._preprocess_data()
@ -177,8 +180,14 @@ class TradingEnvironment(gym.Env):
price_diff = current_price - self.entry_price
pnl = price_diff / self.entry_price - 2 * self.fee_rate # Account for entry and exit fees
# Adjust reward based on PnL and risk
reward = pnl * self.reward_scaling
# Deadzone to discourage micro profits
if pnl > 0 and pnl < self.min_profit_after_fees:
reward = -self.fee_rate
elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
reward = pnl * self.reward_scaling * 0.5
else:
effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
reward = effective_pnl * self.reward_scaling
# Track trade performance
self.total_trades += 1
@ -212,8 +221,12 @@ class TradingEnvironment(gym.Env):
price_diff = current_price - self.entry_price
unrealized_pnl = price_diff / self.entry_price
# Small reward/penalty based on unrealized P&L
reward = unrealized_pnl * 0.05 # Scale down to encourage holding good positions
# Encourage holding only if unrealized edge exceeds deadzone
unrealized_edge = unrealized_pnl
if abs(unrealized_edge) >= self.min_profit_after_fees:
reward = unrealized_edge * (self.reward_scaling * 0.2)
else:
reward = -0.0002
elif self.position < 0: # Short position
if action == 0: # BUY (close short)
@ -221,8 +234,13 @@ class TradingEnvironment(gym.Env):
price_diff = self.entry_price - current_price
pnl = price_diff / self.entry_price - 2 * self.fee_rate # Account for entry and exit fees
# Adjust reward based on PnL and risk
reward = pnl * self.reward_scaling
if pnl > 0 and pnl < self.min_profit_after_fees:
reward = -self.fee_rate
elif pnl < 0 and abs(pnl) < self.min_profit_after_fees:
reward = pnl * self.reward_scaling * 0.5
else:
effective_pnl = pnl - (self.min_profit_after_fees if pnl > 0 else 0.0)
reward = effective_pnl * self.reward_scaling
# Track trade performance
self.total_trades += 1
@ -256,8 +274,12 @@ class TradingEnvironment(gym.Env):
price_diff = self.entry_price - current_price
unrealized_pnl = price_diff / self.entry_price
# Small reward/penalty based on unrealized P&L
reward = unrealized_pnl * 0.05 # Scale down to encourage holding good positions
# Encourage holding only if unrealized edge exceeds deadzone
unrealized_edge = unrealized_pnl
if abs(unrealized_edge) >= self.min_profit_after_fees:
reward = unrealized_edge * (self.reward_scaling * 0.2)
else:
reward = -0.0002
# Record the action
self.actions_taken.append(action)