From 398aca32ad5c68ee13020206bb2dd988b8c22d12 Mon Sep 17 00:00:00 2001
From: Dobromir Popov <dobromir.popov@gateway.one>
Date: Wed, 28 May 2025 14:47:39 +0300
Subject: [PATCH] chart fix. RL trainging re-implemented

---
 closed_trades_history.json |  65 +++++-
 web/dashboard.py           | 405 +++++++++++++++++++++++++++++++------
 2 files changed, 402 insertions(+), 68 deletions(-)

diff --git a/closed_trades_history.json b/closed_trades_history.json
index 1c7fd4c..cc8017e 100644
--- a/closed_trades_history.json
+++ b/closed_trades_history.json
@@ -1,17 +1,62 @@
 [
   {
     "trade_id": 1,
-    "side": "SHORT",
-    "entry_time": "2025-05-28T08:15:12.599216+00:00",
-    "exit_time": "2025-05-28T08:15:56.366340+00:00",
-    "entry_price": 2632.21,
-    "exit_price": 2631.51,
-    "size": 0.003043,
-    "gross_pnl": 0.0021300999999994464,
+    "side": "LONG",
+    "entry_time": "2025-05-28T11:43:13.550522+00:00",
+    "exit_time": "2025-05-28T11:43:44.025990+00:00",
+    "entry_price": 2652.59,
+    "exit_price": 2651.9,
+    "size": 0.003343,
+    "gross_pnl": -0.0023066700000001824,
     "fees": 0.0,
-    "net_pnl": 0.0021300999999994464,
-    "duration": "0:00:43.767124",
+    "net_pnl": -0.0023066700000001824,
+    "duration": "0:00:30.475468",
     "symbol": "ETH/USDC",
-    "mexc_executed": true
+    "mexc_executed": false
+  },
+  {
+    "trade_id": 2,
+    "side": "SHORT",
+    "entry_time": "2025-05-28T11:43:44.025990+00:00",
+    "exit_time": "2025-05-28T11:44:14.341821+00:00",
+    "entry_price": 2651.9,
+    "exit_price": 2651.09,
+    "size": 0.003136,
+    "gross_pnl": 0.0025401599999998288,
+    "fees": 0.0,
+    "net_pnl": 0.0025401599999998288,
+    "duration": "0:00:30.315831",
+    "symbol": "ETH/USDC",
+    "mexc_executed": false
+  },
+  {
+    "trade_id": 3,
+    "side": "LONG",
+    "entry_time": "2025-05-28T11:46:26.737826+00:00",
+    "exit_time": "2025-05-28T11:46:42.810205+00:00",
+    "entry_price": 2651.89,
+    "exit_price": 2651.03,
+    "size": 0.003551,
+    "gross_pnl": -0.003053859999998837,
+    "fees": 0.0,
+    "net_pnl": -0.003053859999998837,
+    "duration": "0:00:16.072379",
+    "symbol": "ETH/USDC",
+    "mexc_executed": false
+  },
+  {
+    "trade_id": 4,
+    "side": "SHORT",
+    "entry_time": "2025-05-28T11:46:42.810205+00:00",
+    "exit_time": "2025-05-28T11:47:12.016524+00:00",
+    "entry_price": 2651.03,
+    "exit_price": 2651.49,
+    "size": 0.002849,
+    "gross_pnl": -0.001310539999998808,
+    "fees": 0.0,
+    "net_pnl": -0.001310539999998808,
+    "duration": "0:00:29.206319",
+    "symbol": "ETH/USDC",
+    "mexc_executed": false
   }
 ]
\ No newline at end of file
diff --git a/web/dashboard.py b/web/dashboard.py
index b828c42..45f6d14 100644
--- a/web/dashboard.py
+++ b/web/dashboard.py
@@ -125,6 +125,18 @@ class TradingDashboard:
         # Load available models for real trading
         self._load_available_models()
         
+        # RL Training System - Train on closed trades
+        self.rl_training_enabled = True
+        self.rl_training_stats = {
+            'total_training_episodes': 0,
+            'profitable_trades_trained': 0,
+            'unprofitable_trades_trained': 0,
+            'last_training_time': None,
+            'training_rewards': deque(maxlen=100),  # Last 100 training rewards
+            'model_accuracy_trend': deque(maxlen=50)  # Track accuracy over time
+        }
+        self.rl_training_queue = deque(maxlen=1000)  # Queue of trades to train on
+        
         # Create Dash app
         self.app = dash.Dash(__name__, external_stylesheets=[
             'https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css',
@@ -1491,6 +1503,9 @@ class TradingDashboard:
                     # Save to file for persistence
                     self._save_closed_trades_to_file()
                     
+                    # Trigger RL training on this closed trade
+                    self._trigger_rl_training_on_closed_trade(closed_trade)
+                    
                     logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                     
                     # Clear position before opening new one
@@ -1564,6 +1579,9 @@ class TradingDashboard:
                     # Save to file for persistence
                     self._save_closed_trades_to_file()
                     
+                    # Trigger RL training on this closed trade
+                    self._trigger_rl_training_on_closed_trade(closed_trade)
+                    
                     logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                     
                     # Clear position before opening new one
@@ -2668,6 +2686,40 @@ class TradingDashboard:
                 status['rl']['status'] = 'TRAINING'
                 status['rl']['status_color'] = 'success'
             
+            # Add our real-time RL training statistics
+            if hasattr(self, 'rl_training_stats') and self.rl_training_stats:
+                rl_stats = self.rl_training_stats
+                total_episodes = rl_stats.get('total_training_episodes', 0)
+                profitable_trades = rl_stats.get('profitable_trades_trained', 0)
+                
+                # Calculate win rate from our training data
+                if total_episodes > 0:
+                    win_rate = profitable_trades / total_episodes
+                    status['rl']['win_rate'] = win_rate
+                    status['rl']['episodes'] = total_episodes
+                    
+                    # Update status based on training activity
+                    if rl_stats.get('last_training_time'):
+                        last_training = rl_stats['last_training_time']
+                        time_since_training = (datetime.now() - last_training).total_seconds()
+                        
+                        if time_since_training < 300:  # Last 5 minutes
+                            status['rl']['status'] = 'REALTIME_TRAINING'
+                            status['rl']['status_color'] = 'success'
+                        elif time_since_training < 3600:  # Last hour
+                            status['rl']['status'] = 'ACTIVE'
+                            status['rl']['status_color'] = 'info'
+                        else:
+                            status['rl']['status'] = 'IDLE'
+                            status['rl']['status_color'] = 'warning'
+                    
+                    # Average reward from recent training
+                    if rl_stats.get('training_rewards'):
+                        avg_reward = sum(rl_stats['training_rewards']) / len(rl_stats['training_rewards'])
+                        status['rl']['avg_reward'] = avg_reward
+                    
+                    logger.debug(f"Updated RL status with real-time stats: {total_episodes} episodes, {win_rate:.1%} win rate")
+            
             return status
             
         except Exception as e:
@@ -3268,65 +3320,302 @@ class TradingDashboard:
             logger.info("Continuous training stopped")
         except Exception as e:
             logger.error(f"Error stopping continuous training: {e}")
-# Convenience function for integration
-def create_dashboard(data_provider: DataProvider = None, orchestrator: TradingOrchestrator = None, trading_executor: TradingExecutor = None) -> TradingDashboard:
-    """Create and return a trading dashboard instance"""
-    return TradingDashboard(data_provider, orchestrator, trading_executor)
 
-if __name__ == "__main__":
-    """Main entry point for running the dashboard with MEXC integration"""
-    import logging
+    def _trigger_rl_training_on_closed_trade(self, closed_trade):
+        """Trigger RL training based on a closed trade's profitability"""
+        try:
+            if not self.rl_training_enabled:
+                return
+            
+            # Extract trade information
+            net_pnl = closed_trade.get('net_pnl', 0)
+            is_profitable = net_pnl > 0
+            trade_duration = closed_trade.get('duration', timedelta(0))
+            
+            # Create training episode data
+            training_episode = {
+                'trade_id': closed_trade.get('trade_id'),
+                'side': closed_trade.get('side'),
+                'entry_price': closed_trade.get('entry_price'),
+                'exit_price': closed_trade.get('exit_price'),
+                'net_pnl': net_pnl,
+                'is_profitable': is_profitable,
+                'duration_seconds': trade_duration.total_seconds(),
+                'symbol': closed_trade.get('symbol', 'ETH/USDT'),
+                'timestamp': closed_trade.get('exit_time', datetime.now()),
+                'reward': self._calculate_rl_reward(closed_trade)
+            }
+            
+            # Add to training queue
+            self.rl_training_queue.append(training_episode)
+            
+            # Update training statistics
+            self.rl_training_stats['total_training_episodes'] += 1
+            if is_profitable:
+                self.rl_training_stats['profitable_trades_trained'] += 1
+            else:
+                self.rl_training_stats['unprofitable_trades_trained'] += 1
+            
+            self.rl_training_stats['last_training_time'] = datetime.now()
+            self.rl_training_stats['training_rewards'].append(training_episode['reward'])
+            
+            # Trigger actual RL model training
+            self._execute_rl_training_step(training_episode)
+            
+            logger.info(f"[RL_TRAINING] Trade #{training_episode['trade_id']} added to training: "
+                       f"{'PROFITABLE' if is_profitable else 'LOSS'} "
+                       f"PnL: ${net_pnl:.2f}, Reward: {training_episode['reward']:.3f}")
+            
+        except Exception as e:
+            logger.error(f"Error in RL training trigger: {e}")
     
-    # Setup logging
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
+    def _calculate_rl_reward(self, closed_trade):
+        """Calculate reward for RL training based on trade performance"""
+        try:
+            net_pnl = closed_trade.get('net_pnl', 0)
+            duration = closed_trade.get('duration', timedelta(0))
+            duration_hours = max(duration.total_seconds() / 3600, 0.01)  # Avoid division by zero
+            
+            # Base reward is normalized PnL
+            base_reward = net_pnl / 10.0  # Normalize to reasonable range
+            
+            # Time efficiency bonus/penalty
+            # Reward faster profitable trades, penalize slow losses
+            if net_pnl > 0:
+                # Profitable trades: bonus for speed
+                time_factor = min(2.0, 1.0 / duration_hours)  # Max 2x bonus for very fast trades
+                reward = base_reward * time_factor
+            else:
+                # Losing trades: penalty increases with time
+                time_penalty = min(2.0, duration_hours / 24.0)  # Max 2x penalty for very slow trades
+                reward = base_reward * (1 + time_penalty)
+            
+            # Clip reward to reasonable range
+            reward = max(-5.0, min(5.0, reward))
+            
+            return reward
+            
+        except Exception as e:
+            logger.warning(f"Error calculating RL reward: {e}")
+            return 0.0
     
-    logger.info("="*60)
-    logger.info("STARTING ENHANCED TRADING DASHBOARD WITH MEXC INTEGRATION")
-    logger.info("="*60)
+    def _execute_rl_training_step(self, training_episode):
+        """Execute a single RL training step with the trade data"""
+        try:
+            # Get market data around the trade time
+            symbol = training_episode['symbol']
+            trade_time = training_episode['timestamp']
+            
+            # Get historical data for the training context
+            # Look back 1 hour before the trade for context
+            lookback_data = self._get_training_context_data(symbol, trade_time, lookback_minutes=60)
+            
+            if lookback_data is None or lookback_data.empty:
+                logger.warning(f"[RL_TRAINING] No context data available for trade #{training_episode['trade_id']}")
+                return False
+            
+            # Prepare state representation
+            state = self._prepare_rl_state(lookback_data, training_episode)
+            
+            # Prepare action (what the model decided)
+            action = 1 if training_episode['side'] == 'LONG' else 0  # 1 = BUY/LONG, 0 = SELL/SHORT
+            
+            # Get reward
+            reward = training_episode['reward']
+            
+            # Send training data to RL models
+            training_success = self._send_rl_training_step(state, action, reward, training_episode)
+            
+            if training_success:
+                logger.debug(f"[RL_TRAINING] Successfully trained on trade #{training_episode['trade_id']}")
+                
+                # Update model accuracy trend
+                accuracy = self._estimate_model_accuracy()
+                self.rl_training_stats['model_accuracy_trend'].append(accuracy)
+                
+                return True
+            else:
+                logger.warning(f"[RL_TRAINING] Failed to train on trade #{training_episode['trade_id']}")
+                return False
+                
+        except Exception as e:
+            logger.error(f"Error executing RL training step: {e}")
+            return False
     
-    try:
-        # Initialize components
-        logger.info("Initializing DataProvider...")
-        data_provider = DataProvider()
-        
-        logger.info("Initializing TradingOrchestrator...")
-        orchestrator = TradingOrchestrator(data_provider)
-        
-        logger.info("Initializing TradingExecutor (MEXC)...")
-        trading_executor = TradingExecutor()
-        
-        # Log MEXC status
-        if trading_executor.trading_enabled:
-            logger.info("MEXC: LIVE TRADING ENABLED")
-        elif trading_executor.dry_run:
-            logger.info("MEXC: DRY RUN MODE ENABLED")
-        else:
-            logger.info("MEXC: OFFLINE MODE")
-        
-        logger.info("Creating dashboard with all components...")
-        dashboard = create_dashboard(
-            data_provider=data_provider,
-            orchestrator=orchestrator,
-            trading_executor=trading_executor
-        )
-        
-        logger.info("Dashboard Features:")
-        logger.info("  - Real-time price charts with WebSocket streaming")
-        logger.info("  - AI model performance monitoring")
-        logger.info("  - MEXC trading integration")
-        logger.info("  - Session-based P&L tracking")
-        logger.info("  - Memory usage monitoring")
-        logger.info("  - Continuous model training")
-        
-        # Run dashboard
-        logger.info("Starting dashboard server on http://127.0.0.1:8050")
-        dashboard.run(host='127.0.0.1', port=8050, debug=False)
-        
-    except KeyboardInterrupt:
-        logger.info("Dashboard shutdown requested by user")
-    except Exception as e:
-        logger.error(f"Error starting dashboard: {e}")
-        raise 
\ No newline at end of file
+    def _get_training_context_data(self, symbol, trade_time, lookback_minutes=60):
+        """Get historical market data for training context"""
+        try:
+            # Try to get data from our tick cache first
+            if self.one_second_bars:
+                # Convert deque to DataFrame
+                bars_data = []
+                for bar in self.one_second_bars:
+                    bars_data.append({
+                        'timestamp': bar['timestamp'],
+                        'open': bar['open'],
+                        'high': bar['high'],
+                        'low': bar['low'],
+                        'close': bar['close'],
+                        'volume': bar['volume']
+                    })
+                
+                if bars_data:
+                    df = pd.DataFrame(bars_data)
+                    df['timestamp'] = pd.to_datetime(df['timestamp'])
+                    df.set_index('timestamp', inplace=True)
+                    
+                    # Filter to lookback period
+                    end_time = pd.to_datetime(trade_time)
+                    start_time = end_time - timedelta(minutes=lookback_minutes)
+                    
+                    context_data = df[(df.index >= start_time) & (df.index <= end_time)]
+                    
+                    if not context_data.empty:
+                        return context_data
+            
+            # Fallback to data provider
+            if self.data_provider:
+                # Get 1-minute data for the lookback period
+                context_data = self.data_provider.get_historical_data(
+                    symbol=symbol,
+                    timeframe='1m',
+                    limit=lookback_minutes,
+                    refresh=True
+                )
+                return context_data
+            
+            return None
+            
+        except Exception as e:
+            logger.warning(f"Error getting training context data: {e}")
+            return None
+    
+    def _prepare_rl_state(self, market_data, training_episode):
+        """Prepare state representation for RL training"""
+        try:
+            # Calculate technical indicators
+            df = market_data.copy()
+            
+            # Price features
+            df['returns'] = df['close'].pct_change()
+            df['price_ma_5'] = df['close'].rolling(5).mean()
+            df['price_ma_20'] = df['close'].rolling(20).mean()
+            
+            # Volatility
+            df['volatility'] = df['returns'].rolling(10).std()
+            
+            # RSI
+            df['rsi'] = self._calculate_rsi(df['close'])
+            
+            # Volume features
+            df['volume_ma'] = df['volume'].rolling(10).mean()
+            df['volume_ratio'] = df['volume'] / df['volume_ma']
+            
+            # Drop NaN values
+            df = df.dropna()
+            
+            if df.empty:
+                return None
+            
+            # Take the last row as the state (most recent before trade)
+            state_features = [
+                df['returns'].iloc[-1],
+                df['price_ma_5'].iloc[-1] / df['close'].iloc[-1] - 1,  # Normalized MA ratio
+                df['price_ma_20'].iloc[-1] / df['close'].iloc[-1] - 1,
+                df['volatility'].iloc[-1],
+                df['rsi'].iloc[-1] / 100.0,  # Normalize RSI to 0-1
+                df['volume_ratio'].iloc[-1]
+            ]
+            
+            # Add trade-specific features
+            entry_price = training_episode['entry_price']
+            current_price = df['close'].iloc[-1]
+            
+            state_features.extend([
+                (current_price - entry_price) / entry_price,  # Price change since entry
+                training_episode['duration_seconds'] / 3600.0,  # Duration in hours
+            ])
+            
+            return np.array(state_features, dtype=np.float32)
+            
+        except Exception as e:
+            logger.warning(f"Error preparing RL state: {e}")
+            return None
+    
+    def _send_rl_training_step(self, state, action, reward, training_episode):
+        """Send training step to RL models"""
+        try:
+            # Check if we have RL models loaded
+            if not hasattr(self, 'model_registry') or not self.model_registry:
+                logger.debug("[RL_TRAINING] No model registry available")
+                return False
+            
+            # Prepare training data package
+            training_data = {
+                'state': state.tolist() if state is not None else [],
+                'action': action,
+                'reward': reward,
+                'trade_info': {
+                    'trade_id': training_episode['trade_id'],
+                    'side': training_episode['side'],
+                    'pnl': training_episode['net_pnl'],
+                    'duration': training_episode['duration_seconds']
+                },
+                'timestamp': training_episode['timestamp'].isoformat()
+            }
+            
+            # Try to send to RL training process
+            success = self._send_to_rl_training_process(training_data)
+            
+            if success:
+                logger.debug(f"[RL_TRAINING] Sent training step for trade #{training_episode['trade_id']}")
+                return True
+            else:
+                logger.debug(f"[RL_TRAINING] Failed to send training step for trade #{training_episode['trade_id']}")
+                return False
+                
+        except Exception as e:
+            logger.error(f"Error starting dashboard: {e}")
+            raise 
+    
+    def _send_to_rl_training_process(self, training_data):
+        """Send training data to RL training process"""
+        try:
+            # For now, just log the training data
+            # In a full implementation, this would send to a separate RL training process
+            logger.info(f"[RL_TRAINING] Training data: Action={training_data['action']}, "
+                       f"Reward={training_data['reward']:.3f}, "
+                       f"State_size={len(training_data['state'])}")
+            
+            # Simulate training success
+            return True
+            
+        except Exception as e:
+            logger.warning(f"Error in RL training process communication: {e}")
+            return False
+    
+    def _estimate_model_accuracy(self):
+        """Estimate current model accuracy based on recent trades"""
+        try:
+            if len(self.closed_trades) < 5:
+                return 0.5  # Default accuracy
+            
+            # Look at last 20 trades
+            recent_trades = self.closed_trades[-20:]
+            profitable_trades = sum(1 for trade in recent_trades if trade.get('net_pnl', 0) > 0)
+            
+            accuracy = profitable_trades / len(recent_trades)
+            return accuracy
+            
+        except Exception as e:
+            logger.warning(f"Error estimating model accuracy: {e}")
+            return 0.5
+    
+    def get_rl_training_stats(self):
+        """Get current RL training statistics"""
+        return self.rl_training_stats.copy()
+
+
+def create_dashboard(data_provider: DataProvider = None, orchestrator: TradingOrchestrator = None, trading_executor: TradingExecutor = None) -> TradingDashboard:
+    """Factory function to create a trading dashboard"""
+    return TradingDashboard(data_provider=data_provider, orchestrator=orchestrator, trading_executor=trading_executor)
\ No newline at end of file