This commit is contained in:
Dobromir Popov
2025-06-24 20:07:44 +03:00
parent 36d4c543c3
commit 06fbbeb81e
3 changed files with 1054 additions and 335 deletions

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,11 @@ import asyncio
import json
import logging
import time
import websockets
try:
import websockets
except ImportError:
# Fallback for environments where websockets is not available
websockets = None
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
@ -106,7 +110,7 @@ class MultiExchangeCOBProvider:
to create a consolidated view of market liquidity and pricing.
"""
def __init__(self, symbols: List[str] = None, bucket_size_bps: float = 1.0):
def __init__(self, symbols: Optional[List[str]] = None, bucket_size_bps: float = 1.0):
"""
Initialize Multi-Exchange COB Provider
@ -461,6 +465,8 @@ class MultiExchangeCOBProvider:
ws_url = f"{config.websocket_url}{config.symbols_mapping[symbol].lower()}@depth20@100ms"
logger.info(f"Connecting to Binance WebSocket: {ws_url}")
if websockets is None:
raise ImportError("websockets module not available")
async with websockets.connect(ws_url) as websocket:
self.exchange_order_books[symbol]['binance']['connected'] = True
logger.info(f"Connected to Binance order book stream for {symbol}")
@ -537,7 +543,7 @@ class MultiExchangeCOBProvider:
except Exception as e:
logger.error(f"Error adding trade to SVP: {e}")
def get_session_volume_profile(self, symbol: str, bucket_size: float = None) -> Dict:
def get_session_volume_profile(self, symbol: str, bucket_size: Optional[float] = None) -> Dict:
"""Get session volume profile for a symbol"""
try:
if bucket_size is None:
@ -690,6 +696,8 @@ class MultiExchangeCOBProvider:
ws_url = f"{config.websocket_url}{config.symbols_mapping[symbol].lower()}@trade"
logger.info(f"Connecting to Binance trade stream: {ws_url}")
if websockets is None:
raise ImportError("websockets module not available")
async with websockets.connect(ws_url) as websocket:
logger.info(f"Connected to Binance trade stream for {symbol}")

View File

@ -301,6 +301,13 @@ class RealtimeRLCOBTrader:
'last_inference_time': None
}
# PnL tracking for loss cutting optimization
self.pnl_history: Dict[str, deque] = {
symbol: deque(maxlen=1000) for symbol in self.symbols
}
self.position_peak_pnl: Dict[str, float] = {symbol: 0.0 for symbol in self.symbols}
self.trade_history: Dict[str, List] = {symbol: [] for symbol in self.symbols}
# Threading
self.running = False
self.inference_lock = Lock()
@ -961,8 +968,10 @@ class RealtimeRLCOBTrader:
actual_direction: int,
confidence: float,
predicted_change: float,
actual_change: float) -> float:
"""Calculate reward for a prediction"""
actual_change: float,
current_pnl: float = 0.0,
position_duration: float = 0.0) -> float:
"""Calculate reward for a prediction with PnL-aware loss cutting optimization"""
try:
# Base reward for correct direction
if predicted_direction == actual_direction:
@ -983,7 +992,42 @@ class RealtimeRLCOBTrader:
if base_reward < 0 and confidence > 0.8:
confidence_scaled_reward *= 1.5 # Increase penalty
return float(confidence_scaled_reward)
# === PnL-AWARE LOSS CUTTING REWARDS ===
pnl_reward = 0.0
# Reward cutting losses early (SIDEWAYS when losing)
if current_pnl < -10.0: # In significant loss
if predicted_direction == 1: # SIDEWAYS (exit signal)
# Reward cutting losses before they get worse
loss_cutting_bonus = min(1.0, abs(current_pnl) / 100.0) * confidence
pnl_reward += loss_cutting_bonus
elif predicted_direction != 1: # Continuing to trade while in loss
# Penalty for not cutting losses
pnl_reward -= 0.5 * confidence
# Reward protecting profits (SIDEWAYS when in profit and market turning)
elif current_pnl > 10.0: # In profit
if predicted_direction == 1 and base_reward > 0: # Correct SIDEWAYS prediction
# Reward protecting profits from reversal
profit_protection_bonus = min(0.5, current_pnl / 200.0) * confidence
pnl_reward += profit_protection_bonus
# Duration penalty for holding losing positions
if current_pnl < 0 and position_duration > 3600: # Losing for > 1 hour
duration_penalty = min(1.0, position_duration / 7200.0) * 0.3 # Up to 30% penalty
confidence_scaled_reward -= duration_penalty
# Severe penalty for letting small losses become big losses
if current_pnl < -50.0: # Large loss
drawdown_penalty = min(2.0, abs(current_pnl) / 100.0) * confidence
confidence_scaled_reward -= drawdown_penalty
# Total reward
total_reward = confidence_scaled_reward + pnl_reward
# Clamp final reward
return max(-5.0, min(5.0, float(total_reward)))
except Exception as e:
logger.error(f"Error calculating reward: {e}")