cleanup, cob ladder still broken

This commit is contained in:
Dobromir Popov
2025-07-07 01:07:48 +03:00
parent 97d9bc97ee
commit 9101448e78
75 changed files with 546 additions and 12123 deletions

View File

@ -9,215 +9,166 @@ rewards for successful holding of positions.
import numpy as np
from datetime import datetime, timedelta
from collections import deque
import logging
class ImprovedRewardCalculator:
def __init__(self,
max_drawdown_pct=0.1, # Maximum drawdown %
risk_reward_ratio=1.5, # Risk-reward ratio
base_fee_rate=0.0002, # 0.02% per transaction
max_frequency_penalty=0.005, # Maximum 0.5% penalty for frequent trading
holding_reward_rate=0.0001, # Small reward for holding profitable positions
risk_adjusted=True, # Use Sharpe ratio for risk adjustment
base_reward=1.0, # Base reward scale
profit_factor=2.0, # Profit reward multiplier
loss_factor=1.0, # Loss penalty multiplier
trade_frequency_penalty=0.3, # Penalty for frequent trading
position_duration_factor=0.05 # Reward for longer positions
):
logger = logging.getLogger(__name__)
class RewardCalculator:
def __init__(self, base_fee_rate=0.001, reward_scaling=10.0, risk_aversion=0.1):
self.base_fee_rate = base_fee_rate
self.max_frequency_penalty = max_frequency_penalty
self.holding_reward_rate = holding_reward_rate
self.risk_adjusted = risk_adjusted
# New parameters
self.base_reward = base_reward
self.profit_factor = profit_factor
self.loss_factor = loss_factor
self.trade_frequency_penalty = trade_frequency_penalty
self.position_duration_factor = position_duration_factor
# Keep track of recent trades
self.recent_trades = deque(maxlen=1000)
self.trade_pnls = deque(maxlen=100) # For risk adjustment
# Additional tracking metrics
self.total_trades = 0
self.profitable_trades = 0
self.total_pnl = 0.0
self.daily_pnl = {}
self.hourly_pnl = {}
def record_trade(self, timestamp=None, action=None, price=None):
"""Record a trade for frequency tracking"""
if timestamp is None:
timestamp = datetime.now()
self.recent_trades.append({
'timestamp': timestamp,
'action': action,
'price': price
})
self.reward_scaling = reward_scaling
self.risk_aversion = risk_aversion
self.trade_pnls = []
self.returns = []
self.trade_timestamps = []
self.frequency_threshold = 10 # Trades per minute threshold for penalty
self.max_frequency_penalty = 0.05
def record_pnl(self, pnl):
"""Record a PnL result for risk adjustment and tracking metrics"""
"""Record P&L for risk adjustment calculations"""
self.trade_pnls.append(pnl)
# Update overall metrics
self.total_trades += 1
self.total_pnl += pnl
if pnl > 0:
self.profitable_trades += 1
# Track daily and hourly PnL
now = datetime.now()
day_key = now.strftime('%Y-%m-%d')
hour_key = now.strftime('%Y-%m-%d %H:00')
# Update daily PnL
if day_key not in self.daily_pnl:
self.daily_pnl[day_key] = 0.0
self.daily_pnl[day_key] += pnl
# Update hourly PnL
if hour_key not in self.hourly_pnl:
self.hourly_pnl[hour_key] = 0.0
self.hourly_pnl[hour_key] += pnl
if len(self.trade_pnls) > 100:
self.trade_pnls.pop(0)
def record_trade(self, action):
"""Record trade action for frequency penalty calculations"""
from time import time
self.trade_timestamps.append(time())
if len(self.trade_timestamps) > 100:
self.trade_timestamps.pop(0)
def _calculate_frequency_penalty(self):
"""Calculate penalty for trading too frequently"""
if len(self.recent_trades) < 2:
"""Calculate penalty for high-frequency trading"""
if len(self.trade_timestamps) < 2:
return 0.0
# Count trades in the last minute
now = datetime.now()
one_minute_ago = now - timedelta(minutes=1)
trades_last_minute = sum(1 for trade in self.recent_trades
if trade['timestamp'] > one_minute_ago)
# Apply progressive penalty (more severe as frequency increases)
if trades_last_minute <= 1:
return 0.0 # No penalty for normal trading rate
# Progressive penalty based on trade frequency
penalty = min(self.max_frequency_penalty,
self.base_fee_rate * trades_last_minute)
return penalty
def _calculate_holding_reward(self, position_held_time, price_change_pct):
"""Calculate reward for holding a position for some time"""
if position_held_time <= 0 or price_change_pct <= 0:
return 0.0 # No reward for unprofitable holds
# Cap at 100 time units (seconds, minutes, etc.)
capped_time = min(position_held_time, 100)
# Scale reward by both time and price change
reward = self.holding_reward_rate * capped_time * price_change_pct
return reward
time_span = self.trade_timestamps[-1] - self.trade_timestamps[0]
if time_span <= 0:
return 0.0
trades_per_minute = (len(self.trade_timestamps) / time_span) * 60
if trades_per_minute > self.frequency_threshold:
penalty = min(self.max_frequency_penalty, (trades_per_minute - self.frequency_threshold) * 0.001)
return penalty
return 0.0
def _calculate_risk_adjustment(self, reward):
"""Adjust rewards based on risk (simple Sharpe ratio implementation)"""
if len(self.trade_pnls) < 5:
return reward # Not enough data for adjustment
# Calculate mean and standard deviation of returns
return reward
pnl_array = np.array(self.trade_pnls)
mean_return = np.mean(pnl_array)
std_return = np.std(pnl_array)
if std_return == 0:
return reward # Avoid division by zero
# Simplified Sharpe ratio
return reward
sharpe = mean_return / std_return
# Scale reward by Sharpe ratio (normalized to be around 1.0)
adjustment_factor = np.clip(1.0 + 0.5 * sharpe, 0.5, 2.0)
return reward * adjustment_factor
def calculate_reward(self, action, price_change, position_held_time=0,
volatility=None, is_profitable=False):
"""
Calculate the improved reward
Args:
action (int): 0 = Buy, 1 = Sell, 2 = Hold
price_change (float): Percent price change for the trade
position_held_time (int): Time position was held (in time units)
volatility (float, optional): Market volatility measure
is_profitable (bool): Whether current position is profitable
Returns:
float: Calculated reward value
"""
# Calculate trading fee
def _calculate_holding_reward(self, position_held_time, price_change):
"""Calculate reward for holding a position"""
base_holding_reward = 0.0005 * (position_held_time / 60.0)
if price_change > 0:
return base_holding_reward * 2
elif price_change < 0:
return base_holding_reward * 0.5
return base_holding_reward
def calculate_basic_reward(self, pnl, confidence):
"""Calculate basic training reward based on P&L and confidence"""
try:
base_reward = pnl
if pnl < 0 and confidence > 0.7:
confidence_adjustment = -confidence * 2
elif pnl > 0 and confidence > 0.7:
confidence_adjustment = confidence * 1.5
else:
confidence_adjustment = 0
final_reward = base_reward + confidence_adjustment
normalized_reward = np.tanh(final_reward / 10.0)
logger.debug(f"Basic reward calculation: P&L={pnl:.4f}, confidence={confidence:.2f}, reward={normalized_reward:.4f}")
return float(normalized_reward)
except Exception as e:
logger.error(f"Error calculating basic reward: {e}")
return 0.0
def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
"""Calculate enhanced reward for trading actions"""
fee = self.base_fee_rate
# Calculate frequency penalty
frequency_penalty = self._calculate_frequency_penalty()
# Base reward calculation
if action == 0: # Buy
# Small penalty for transaction plus frequency penalty
reward = -fee - frequency_penalty
elif action == 1: # Sell
# Calculate profit percentage minus fees (both entry and exit)
profit_pct = price_change
net_profit = profit_pct - (fee * 2)
# Scale reward and apply frequency penalty
reward = net_profit * 10 # Scale reward
reward = net_profit * self.reward_scaling
reward -= frequency_penalty
# Record PnL for risk adjustment
self.record_pnl(net_profit)
else: # Hold
# Small reward for holding a profitable position, small cost otherwise
if is_profitable:
reward = self._calculate_holding_reward(position_held_time, price_change)
else:
reward = -0.0001 # Very small negative reward
# Apply risk adjustment if enabled
if self.risk_adjusted:
reward = self._calculate_risk_adjustment(reward)
# Record this action for future frequency calculations
self.record_trade(action=action)
reward = -0.0001
if action in [0, 1] and predicted_change != 0:
if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
reward += abs(actual_change) * 5.0
else:
reward -= abs(predicted_change) * 2.0
reward += current_pnl * 0.1
if volatility is not None:
reward -= abs(volatility) * 100
if self.risk_aversion > 0 and len(self.returns) > 1:
returns_std = np.std(self.returns)
reward -= returns_std * self.risk_aversion
self.record_trade(action)
return reward
def calculate_prediction_reward(self, symbol, predicted_direction, actual_direction, confidence, predicted_change, actual_change, current_pnl=0.0, position_duration=0.0):
"""Calculate reward for prediction accuracy"""
reward = 0.0
if predicted_direction == actual_direction:
reward += 1.0 * confidence
else:
reward -= 0.5
if predicted_direction == actual_direction and abs(predicted_change) > 0.001:
reward += abs(actual_change) * 5.0
if predicted_direction != actual_direction and abs(predicted_change) > 0.001:
reward -= abs(predicted_change) * 2.0
reward += current_pnl * 0.1
# Dynamic adjustment based on recent PnL (loss cutting incentive)
if hasattr(self, 'pnl_history') and symbol in self.pnl_history and self.pnl_history[symbol]:
latest_pnl_entry = self.pnl_history[symbol][-1]
latest_pnl_value = latest_pnl_entry.get('pnl', 0.0) if isinstance(latest_pnl_entry, dict) else 0.0
if latest_pnl_value < 0 and position_duration > 60:
reward -= (abs(latest_pnl_value) * 0.2)
pnl_values = [entry.get('pnl', 0.0) for entry in self.pnl_history[symbol] if isinstance(entry, dict)]
best_pnl = max(pnl_values) if pnl_values else 0.0
if best_pnl < 0.0:
reward -= 0.1
return reward
# Example usage:
if __name__ == "__main__":
# Create calculator instance
reward_calc = ImprovedRewardCalculator()
reward_calc = RewardCalculator()
# Example reward for a buy action
buy_reward = reward_calc.calculate_reward(action=0, price_change=0)
buy_reward = reward_calc.calculate_enhanced_reward(action=0, price_change=0)
print(f"Buy action reward: {buy_reward:.5f}")
# Record a trade for frequency tracking
reward_calc.record_trade(action=0)
reward_calc.record_trade(0)
# Wait a bit and make another trade to test frequency penalty
import time
time.sleep(0.1)
# Example reward for a sell action with profit
sell_reward = reward_calc.calculate_reward(action=1, price_change=0.015, position_held_time=60)
sell_reward = reward_calc.calculate_enhanced_reward(action=1, price_change=0.015, position_held_time=60)
print(f"Sell action reward (with profit): {sell_reward:.5f}")
# Example reward for a hold action on profitable position
hold_reward = reward_calc.calculate_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True)
hold_reward = reward_calc.calculate_enhanced_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True)
print(f"Hold action reward (profitable): {hold_reward:.5f}")
# Example reward for a hold action on unprofitable position
hold_reward_neg = reward_calc.calculate_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False)
hold_reward_neg = reward_calc.calculate_enhanced_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False)
print(f"Hold action reward (unprofitable): {hold_reward_neg:.5f}")