Merge commit 'd49a473ed6f4aef55bfdd47d6370e53582be6b7b' into cleanup

2025-10-01 00:32:19 +03:00
parent a03b9c5701 d49a473ed6
commit 388334e4a8
353 changed files with 81004 additions and 35899 deletions
--- a/NN/training/enhanced_realtime_training.py
+++ b/NN/training/enhanced_realtime_training.py
@@ -27,8 +27,18 @@ import torch
 import torch.nn as nn
 import torch.optim as optim

+<<<<<<< HEAD
 # Import prediction tracking
 from core.prediction_database import get_prediction_db
+=======
+# Import checkpoint management
+try:
+    from utils.checkpoint_manager import get_checkpoint_manager, save_checkpoint
+    CHECKPOINT_MANAGER_AVAILABLE = True
+except ImportError:
+    CHECKPOINT_MANAGER_AVAILABLE = False
+    logger.warning("Checkpoint manager not available. Model persistence will be disabled.")
+>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b

 logger = logging.getLogger(__name__)

@@ -61,12 +71,19 @@ class EnhancedRealtimeTrainingSystem:
        # Experience buffers
        self.experience_buffer = deque(maxlen=self.training_config['memory_size'])
        self.validation_buffer = deque(maxlen=1000)
+        
+        # Training counters - CRITICAL for checkpoint management
+        self.training_iteration = 0
+        self.dqn_training_count = 0
+        self.cnn_training_count = 0
+        self.cob_training_count = 0
        self.priority_buffer = deque(maxlen=2000)  # High-priority experiences
        
        # Performance tracking
        self.performance_history = {
            'dqn_losses': deque(maxlen=1000),
            'cnn_losses': deque(maxlen=1000),
+            'cob_rl_losses': deque(maxlen=1000),  # Added COB RL loss tracking
            'prediction_accuracy': deque(maxlen=500),
            'trading_performance': deque(maxlen=200),
            'validation_scores': deque(maxlen=100)
@@ -764,18 +781,33 @@ class EnhancedRealtimeTrainingSystem:
                # Statistical features across time for each aggregated dimension
                for feature_idx in range(agg_matrix.shape[1]):
                    feature_series = agg_matrix[:, feature_idx]
-                    combined_features.extend([
-                        np.mean(feature_series),
-                        np.std(feature_series),
-                        np.min(feature_series),
-                        np.max(feature_series),
-                        feature_series[-1] - feature_series[0] if len(feature_series) > 1 else 0,  # Total change
-                        np.mean(np.diff(feature_series)) if len(feature_series) > 1 else 0,  # Average momentum
-                        np.std(np.diff(feature_series)) if len(feature_series) > 2 else 0,   # Momentum volatility
-                        np.percentile(feature_series, 25),  # 25th percentile
-                        np.percentile(feature_series, 75),  # 75th percentile
-                        len([x for x in np.diff(feature_series) if x > 0]) / max(len(feature_series) - 1, 1) if len(feature_series) > 1 else 0.5  # Positive change ratio
-                    ])
+                    # Clean feature series to prevent division warnings
+                    feature_series_clean = feature_series[np.isfinite(feature_series)]
+                    
+                    if len(feature_series_clean) > 0:
+                        # Safe percentile calculation
+                        try:
+                            percentile_25 = np.percentile(feature_series_clean, 25)
+                            percentile_75 = np.percentile(feature_series_clean, 75)
+                        except (ValueError, RuntimeWarning):
+                            percentile_25 = np.median(feature_series_clean) if len(feature_series_clean) > 0 else 0
+                            percentile_75 = np.median(feature_series_clean) if len(feature_series_clean) > 0 else 0
+                        
+                        combined_features.extend([
+                            np.mean(feature_series_clean),
+                            np.std(feature_series_clean),
+                            np.min(feature_series_clean),
+                            np.max(feature_series_clean),
+                            feature_series_clean[-1] - feature_series_clean[0] if len(feature_series_clean) > 1 else 0,  # Total change
+                            np.mean(np.diff(feature_series_clean)) if len(feature_series_clean) > 1 else 0,  # Average momentum
+                            np.std(np.diff(feature_series_clean)) if len(feature_series_clean) > 2 else 0,   # Momentum volatility
+                            percentile_25,  # 25th percentile
+                            percentile_75,  # 75th percentile
+                            len([x for x in np.diff(feature_series_clean) if x > 0]) / max(len(feature_series_clean) - 1, 1) if len(feature_series_clean) > 1 else 0.5  # Positive change ratio
+                        ])
+                    else:
+                        # All values are NaN or inf, use zeros
+                        combined_features.extend([0.0] * 10)
            else:
                combined_features.extend([0.0] * (15 * 10))  # 15 features * 10 statistics
            
@@ -913,13 +945,14 @@ class EnhancedRealtimeTrainingSystem:
            lows = np.array([bar['low'] for bar in self.real_time_data['ohlcv_1m']])
            
            # Update indicators
+            price_mean = np.mean(prices[-20:])
            self.technical_indicators = {
                'sma_10': np.mean(prices[-10:]),
                'sma_20': np.mean(prices[-20:]),
                'rsi': self._calculate_rsi(prices, 14),
-                'volatility': np.std(prices[-20:]) / np.mean(prices[-20:]),
+                'volatility': np.std(prices[-20:]) / price_mean if price_mean > 0 else 0,
                'volume_sma': np.mean(volumes[-10:]),
-                'price_momentum': (prices[-1] - prices[-5]) / prices[-5] if len(prices) >= 5 else 0,
+                'price_momentum': (prices[-1] - prices[-5]) / prices[-5] if len(prices) >= 5 and prices[-5] > 0 else 0,
                'atr': np.mean(highs[-14:] - lows[-14:]) if len(prices) >= 14 else 0
            }
            
@@ -935,8 +968,8 @@ class EnhancedRealtimeTrainingSystem:
            current_time = time.time()
            current_bar = self.real_time_data['ohlcv_1m'][-1]
            
-            # Create comprehensive state features
-            state_features = self._build_comprehensive_state()
+            # Create comprehensive state features with default dimensions
+            state_features = self._build_comprehensive_state(100)  # Use default 100 for general experiences
            
            # Create experience with proper reward calculation
            experience = {
@@ -959,8 +992,8 @@ class EnhancedRealtimeTrainingSystem:
        except Exception as e:
            logger.debug(f"Error creating training experiences: {e}")
    
-    def _build_comprehensive_state(self) -> np.ndarray:
-        """Build comprehensive state vector for RL training"""
+    def _build_comprehensive_state(self, target_dimensions: int = 100) -> np.ndarray:
+        """Build comprehensive state vector for RL training with adaptive dimensions"""
        try:
            state_features = []
            
@@ -1003,15 +1036,138 @@ class EnhancedRealtimeTrainingSystem:
            state_features.append(np.cos(2 * np.pi * now.hour / 24))
            state_features.append(now.weekday() / 6.0)  # Day of week
            
-            # Pad to fixed size (100 features)
-            while len(state_features) < 100:
+            # Current count: 10 (prices) + 7 (indicators) + 1 (volume) + 5 (COB) + 3 (time) = 26 base features
+            
+            # 6. Enhanced features for larger dimensions
+            if target_dimensions > 50:
+                # Add more price history
+                if len(self.real_time_data['ohlcv_1m']) >= 20:
+                    extended_prices = [bar['close'] for bar in list(self.real_time_data['ohlcv_1m'])[-20:]]
+                    base_price = extended_prices[0]
+                    extended_normalized = [(p - base_price) / base_price for p in extended_prices[10:]]  # Additional 10
+                    state_features.extend(extended_normalized)
+                else:
+                    state_features.extend([0.0] * 10)
+                
+                # Add volume history
+                if len(self.real_time_data['ohlcv_1m']) >= 10:
+                    volume_history = [bar['volume'] for bar in list(self.real_time_data['ohlcv_1m'])[-10:]]
+                    avg_vol = np.mean(volume_history) if volume_history else 1.0
+                    # Prevent division by zero
+                    if avg_vol == 0:
+                        avg_vol = 1.0
+                    normalized_volumes = [v / avg_vol for v in volume_history]
+                    state_features.extend(normalized_volumes)
+                else:
+                    state_features.extend([0.0] * 10)
+                
+                # Add extended COB features
+                extended_cob = self._extract_cob_features()
+                state_features.extend(extended_cob[5:])  # Remaining COB features
+                
+                # Add 5m timeframe data if available
+                if len(self.real_time_data['ohlcv_5m']) >= 5:
+                    tf_5m_prices = [bar['close'] for bar in list(self.real_time_data['ohlcv_5m'])[-5:]]
+                    if tf_5m_prices:
+                        base_5m = tf_5m_prices[0]
+                        # Prevent division by zero
+                        if base_5m == 0:
+                            base_5m = 1.0
+                        normalized_5m = [(p - base_5m) / base_5m for p in tf_5m_prices]
+                        state_features.extend(normalized_5m)
+                    else:
+                        state_features.extend([0.0] * 5)
+                else:
+                    state_features.extend([0.0] * 5)
+            
+            # 7. Adaptive padding/truncation based on target dimensions
+            current_length = len(state_features)
+            
+            if target_dimensions > current_length:
+                # Pad with additional engineered features
+                remaining = target_dimensions - current_length
+                
+                # Add statistical features if we have data
+                if len(self.real_time_data['ohlcv_1m']) >= 20:
+                    all_prices = [bar['close'] for bar in list(self.real_time_data['ohlcv_1m'])[-20:]]
+                    all_volumes = [bar['volume'] for bar in list(self.real_time_data['ohlcv_1m'])[-20:]]
+                    
+                    # Statistical features
+                    additional_features = [
+                        np.std(all_prices) / np.mean(all_prices) if np.mean(all_prices) > 0 else 0,  # Price CV
+                        np.std(all_volumes) / np.mean(all_volumes) if np.mean(all_volumes) > 0 else 0,  # Volume CV
+                        (max(all_prices) - min(all_prices)) / np.mean(all_prices) if np.mean(all_prices) > 0 else 0,  # Price range
+                        # Safe correlation calculation
+                        np.corrcoef(all_prices, all_volumes)[0, 1] if (len(all_prices) == len(all_volumes) and len(all_prices) > 1 and 
+                                                                        np.std(all_prices) > 0 and np.std(all_volumes) > 0) else 0,  # Price-volume correlation
+                    ]
+                    
+                    # Add momentum features
+                    for window in [3, 5, 10]:
+                        if len(all_prices) >= window:
+                            momentum = (all_prices[-1] - all_prices[-window]) / all_prices[-window] if all_prices[-window] > 0 else 0
+                            additional_features.append(momentum)
+                        else:
+                            additional_features.append(0.0)
+                    
+                    # Extend to fill remaining space
+                    while len(additional_features) < remaining and len(additional_features) < 50:
+                        additional_features.extend([
+                            np.sin(len(additional_features) * 0.1),  # Sine waves for variety
+                            np.cos(len(additional_features) * 0.1),
+                            np.tanh(len(additional_features) * 0.01)
+                        ])
+                    
+                    state_features.extend(additional_features[:remaining])
+                else:
+                    # Fill with structured zeros/patterns if no data
+                    pattern_features = []
+                    for i in range(remaining):
+                        pattern_features.append(np.sin(i * 0.01))  # Small oscillating pattern
+                    state_features.extend(pattern_features)
+            
+            # Ensure exact target dimension
+            state_features = state_features[:target_dimensions]
+            while len(state_features) < target_dimensions:
                state_features.append(0.0)
            
-            return np.array(state_features[:100])
+            return np.array(state_features)
            
        except Exception as e:
            logger.error(f"Error building state: {e}")
-            return np.zeros(100)
+            return np.zeros(target_dimensions)
+
+    def _get_model_expected_dimensions(self, model_type: str) -> int:
+        """Get expected input dimensions for different model types"""
+        try:
+            if model_type == 'dqn':
+                # Try to get DQN expected dimensions from model
+                if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent') 
+                    and self.orchestrator.rl_agent and hasattr(self.orchestrator.rl_agent, 'policy_net')):
+                    # Get first layer input size
+                    first_layer = list(self.orchestrator.rl_agent.policy_net.children())[0]
+                    if hasattr(first_layer, 'in_features'):
+                        return first_layer.in_features
+                return 403  # Default for DQN based on error logs
+            
+            elif model_type == 'cnn':
+                # CNN might have different input expectations
+                if (self.orchestrator and hasattr(self.orchestrator, 'cnn_model') 
+                    and self.orchestrator.cnn_model):
+                    # Try to get CNN input size
+                    if hasattr(self.orchestrator.cnn_model, 'input_shape'):
+                        return self.orchestrator.cnn_model.input_shape
+                return 300  # Default for CNN based on error logs
+            
+            elif model_type == 'cob_rl':
+                return 2000  # COB RL expects 2000 features
+            
+            else:
+                return 100  # Default
+                
+        except Exception as e:
+            logger.debug(f"Error getting model dimensions for {model_type}: {e}")
+            return 100  # Fallback
    
    def _extract_cob_features(self) -> List[float]:
        """Extract features from COB data"""
@@ -1131,8 +1287,8 @@ class EnhancedRealtimeTrainingSystem:
                        total_loss += loss
                        training_iterations += 1
                elif hasattr(rl_agent, 'replay'):
-                    # Fallback to replay method
-                    loss = rl_agent.replay(batch_size=len(batch))
+                    # Fallback to replay method - DQNAgent.replay() doesn't accept batch_size parameter
+                    loss = rl_agent.replay()
                    if loss is not None:
                        total_loss += loss
                        training_iterations += 1
@@ -1142,6 +1298,10 @@ class EnhancedRealtimeTrainingSystem:
            
            self.dqn_training_count += 1
            
+            # Save checkpoint after training
+            if training_iterations > 0 and avg_loss > 0:
+                self._save_model_checkpoint('dqn_agent', rl_agent, avg_loss)
+            
            # Log progress every 10 training sessions
            if self.dqn_training_count % 10 == 0:
                logger.info(f"DQN TRAINING: Session {self.dqn_training_count}, "
@@ -1175,6 +1335,18 @@ class EnhancedRealtimeTrainingSystem:
                    aggregated_matrix = self.get_cob_training_matrix(symbol, '1s_aggregated')
                    
                    if combined_features is not None:
+                        # Ensure features are exactly 2000 dimensions
+                        if len(combined_features) != 2000:
+                            logger.warning(f"COB features wrong size: {len(combined_features)}, padding/truncating to 2000")
+                            if len(combined_features) < 2000:
+                                # Pad with zeros
+                                padded_features = np.zeros(2000, dtype=np.float32)
+                                padded_features[:len(combined_features)] = combined_features
+                                combined_features = padded_features
+                            else:
+                                # Truncate to 2000
+                                combined_features = combined_features[:2000]
+                        
                        # Create enhanced COB training experience
                        current_price = self._get_current_price_from_data(symbol)
                        if current_price:
@@ -1184,29 +1356,14 @@ class EnhancedRealtimeTrainingSystem:
                            # Calculate reward based on COB prediction accuracy
                            reward = self._calculate_cob_reward(symbol, action, combined_features)
                            
-                            # Create comprehensive state vector for COB RL
+                            # Create comprehensive state vector for COB RL (exactly 2000 dimensions)
                            state = combined_features  # 2000-dimensional state
                            
                            # Store experience in COB RL agent
-                            if hasattr(cob_rl_agent, 'store_experience'):
-                                experience = {
-                                    'state': state,
-                                    'action': action,
-                                    'reward': reward,
-                                    'next_state': state,  # Will be updated with next observation
-                                    'done': False,
-                                    'symbol': symbol,
-                                    'timestamp': datetime.now(),
-                                    'price': current_price,
-                                    'cob_features': {
-                                        'raw_tick_available': raw_tick_matrix is not None,
-                                        'aggregated_available': aggregated_matrix is not None,
-                                        'imbalance': combined_features[0] if len(combined_features) > 0 else 0,
-                                        'spread': combined_features[1] if len(combined_features) > 1 else 0,
-                                        'liquidity': combined_features[4] if len(combined_features) > 4 else 0
-                                    }
-                                }
-                                cob_rl_agent.store_experience(experience)
+                            if hasattr(cob_rl_agent, 'remember'):
+                                # Use tuple format for DQN agent compatibility
+                                experience_tuple = (state, action, reward, state, False)  # next_state = current state for now
+                                cob_rl_agent.remember(state, action, reward, state, False)
                                training_updates += 1
                            
                            # Perform COB RL training if enough experiences
@@ -1479,16 +1636,29 @@ class EnhancedRealtimeTrainingSystem:
                # Moving averages
                if len(prev_prices) >= 5:
                    ma5 = sum(prev_prices[-5:]) / 5
-                    tech_features.append((current_price - ma5) / ma5)
+                    # Prevent division by zero
+                    if ma5 != 0:
+                        tech_features.append((current_price - ma5) / ma5)
+                    else:
+                        tech_features.append(0.0)
                
                if len(prev_prices) >= 10:
                    ma10 = sum(prev_prices[-10:]) / 10
-                    tech_features.append((current_price - ma10) / ma10)
+                    # Prevent division by zero
+                    if ma10 != 0:
+                        tech_features.append((current_price - ma10) / ma10)
+                    else:
+                        tech_features.append(0.0)
                
                # Volatility measure
                if len(prev_prices) >= 5:
-                    volatility = np.std(prev_prices[-5:]) / np.mean(prev_prices[-5:])
-                    tech_features.append(volatility)
+                    price_mean = np.mean(prev_prices[-5:])
+                    # Prevent division by zero
+                    if price_mean != 0:
+                        volatility = np.std(prev_prices[-5:]) / price_mean
+                        tech_features.append(volatility)
+                    else:
+                        tech_features.append(0.0)
            
            # Pad technical features to 200
            while len(tech_features) < 200:
@@ -1670,6 +1840,14 @@ class EnhancedRealtimeTrainingSystem:
            features_tensor = torch.from_numpy(features).float().to(device)
            targets_tensor = torch.from_numpy(targets).long().to(device)

+            # FIXED: Move tensors to same device as model
+            device = next(model.parameters()).device
+            features_tensor = features_tensor.to(device)
+            targets_tensor = targets_tensor.to(device)
+            
+            # Move criterion to same device as well
+            criterion = criterion.to(device)
+
            # Ensure features_tensor has the correct shape for CNN (batch_size, channels, height, width)
            # Assuming features are flattened (batch_size, 15*20) and need to be reshaped to (batch_size, 1, 15, 20)
            # This depends on the actual CNN model architecture. Assuming a simple CNN that expects (batch, channels, height, width)
@@ -1700,6 +1878,7 @@ class EnhancedRealtimeTrainingSystem:
            
            outputs = model(features_tensor)
            
+<<<<<<< HEAD
            # Extract logits from model output (model returns a dictionary)
            if isinstance(outputs, dict):
                logits = outputs['logits']
@@ -1713,6 +1892,19 @@ class EnhancedRealtimeTrainingSystem:
                logger.error(f"CNN output is not a tensor: {type(logits)}")
                return 0.0
            
+=======
+            # FIXED: Handle case where model returns tuple (extract the logits)
+            if isinstance(outputs, tuple):
+                # Assume the first element is the main output (logits)
+                logits = outputs[0]
+            elif isinstance(outputs, dict):
+                # Handle dictionary output (get main prediction)
+                logits = outputs.get('logits', outputs.get('predictions', outputs.get('output', list(outputs.values())[0])))
+            else:
+                # Single tensor output
+                logits = outputs
+            
+>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            loss = criterion(logits, targets_tensor)
            
            loss.backward()
@@ -1721,8 +1913,122 @@ class EnhancedRealtimeTrainingSystem:
            return loss.item()
            
        except Exception as e:
-            logger.error(f"Error in CNN training: {e}")
+            logger.error(f"RT TRAINING: Error in CNN training: {e}")
            return 1.0  # Return default loss value in case of error
+
+    def _sample_prioritized_experiences(self) -> List[Dict]:
+        """Sample prioritized experiences for training"""
+        try:
+            experiences = []
+            
+            # Sample from priority buffer first (high-priority experiences)
+            if self.priority_buffer:
+                priority_samples = min(len(self.priority_buffer), self.training_config['batch_size'] // 2)
+                priority_experiences = random.sample(list(self.priority_buffer), priority_samples)
+                experiences.extend(priority_experiences)
+            
+            # Sample from regular experience buffer
+            if self.experience_buffer:
+                remaining_samples = self.training_config['batch_size'] - len(experiences)
+                if remaining_samples > 0:
+                    regular_samples = min(len(self.experience_buffer), remaining_samples)
+                    regular_experiences = random.sample(list(self.experience_buffer), regular_samples)
+                    experiences.extend(regular_experiences)
+            
+            # Convert experiences to DQN format
+            dqn_experiences = []
+            for exp in experiences:
+                # Create next state by shifting current state (simple approximation)
+                next_state = exp['state'].copy() if hasattr(exp['state'], 'copy') else exp['state']
+                
+                # Simple reward based on recent market movement
+                reward = self._calculate_experience_reward(exp)
+                
+                # Action mapping: 0=BUY, 1=SELL, 2=HOLD
+                action = self._determine_action_from_experience(exp)
+                
+                dqn_exp = {
+                    'state': exp['state'],
+                    'action': action,
+                    'reward': reward,
+                    'next_state': next_state,
+                    'done': False  # Episodes don't really "end" in continuous trading
+                }
+                
+                dqn_experiences.append(dqn_exp)
+            
+            return dqn_experiences
+            
+        except Exception as e:
+            logger.error(f"Error sampling prioritized experiences: {e}")
+            return []
+
+    def _calculate_experience_reward(self, experience: Dict) -> float:
+        """Calculate reward for an experience"""
+        try:
+            # Simple reward based on technical indicators and market events
+            reward = 0.0
+            
+            # Reward based on market events
+            if experience.get('market_events', 0) > 0:
+                reward += 0.1  # Bonus for learning from market events
+            
+            # Reward based on technical indicators
+            tech_indicators = experience.get('technical_indicators', {})
+            if tech_indicators:
+                # Reward for strong momentum
+                momentum = tech_indicators.get('price_momentum', 0)
+                reward += np.tanh(momentum * 10)  # Bounded reward
+                
+                # Penalize high volatility
+                volatility = tech_indicators.get('volatility', 0)
+                reward -= min(volatility * 5, 0.2)  # Penalty for high volatility
+            
+            # Reward based on COB features
+            cob_features = experience.get('cob_features', [])
+            if cob_features and len(cob_features) > 0:
+                # Reward for strong order book imbalance
+                imbalance = cob_features[0] if len(cob_features) > 0 else 0
+                reward += abs(imbalance) * 0.1  # Reward for any imbalance signal
+            
+            return max(-1.0, min(1.0, reward))  # Clamp to [-1, 1]
+            
+        except Exception as e:
+            logger.debug(f"Error calculating experience reward: {e}")
+            return 0.0
+
+    def _determine_action_from_experience(self, experience: Dict) -> int:
+        """Determine action from experience data"""
+        try:
+            # Use technical indicators to determine action
+            tech_indicators = experience.get('technical_indicators', {})
+            
+            if tech_indicators:
+                momentum = tech_indicators.get('price_momentum', 0)
+                rsi = tech_indicators.get('rsi', 50)
+                
+                # Simple logic based on momentum and RSI
+                if momentum > 0.005 and rsi < 70:  # Upward momentum, not overbought
+                    return 0  # BUY
+                elif momentum < -0.005 and rsi > 30:  # Downward momentum, not oversold
+                    return 1  # SELL
+                else:
+                    return 2  # HOLD
+            
+            # Fallback to COB-based action
+            cob_features = experience.get('cob_features', [])
+            if cob_features and len(cob_features) > 0:
+                imbalance = cob_features[0]
+                if imbalance > 0.1:
+                    return 0  # BUY (bid imbalance)
+                elif imbalance < -0.1:
+                    return 1  # SELL (ask imbalance)
+            
+            return 2  # Default to HOLD
+            
+        except Exception as e:
+            logger.debug(f"Error determining action from experience: {e}")
+            return 2  # Default to HOLD
    
    def _perform_validation(self):
        """Perform validation to track model performance"""
@@ -2084,17 +2390,21 @@ class EnhancedRealtimeTrainingSystem:
    def _generate_forward_dqn_prediction(self, symbol: str, current_time: float):
        """Generate a DQN prediction for future price movement"""
        try:
-            # Get current market state (only historical data)
-            current_state = self._build_comprehensive_state()
+            # Get current market state with DQN-specific dimensions
+            target_dims = self._get_model_expected_dimensions('dqn')
+            current_state = self._build_comprehensive_state(target_dims)
            current_price = self._get_current_price_from_data(symbol)
            
-            if current_price is None:
+            # SKIP prediction if price is invalid
+            if current_price is None or current_price <= 0:
+                logger.debug(f"Skipping DQN prediction for {symbol}: invalid price {current_price}")
                return
            
            # Use DQN model to predict action (if available)
            if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent') 
                and self.orchestrator.rl_agent):
                
+<<<<<<< HEAD
                # Use RL agent to make prediction
                current_state = self._get_dqn_state_features(symbol)
                if current_state is None:
@@ -2112,6 +2422,28 @@ class EnhancedRealtimeTrainingSystem:
                
                confidence = max(q_values) / sum(q_values) if sum(q_values) > 0 else 0.33

+=======
+                # Get action from DQN agent
+                action = self.orchestrator.rl_agent.act(current_state, explore=False)
+                
+                # Get Q-values by manually calling the model
+                q_values = self._get_dqn_q_values(current_state)
+                
+                # Calculate confidence from Q-values
+                if q_values is not None and len(q_values) > 0:
+                    # Convert to probabilities and get confidence
+                    probs = torch.softmax(torch.tensor(q_values), dim=0).numpy()
+                    confidence = float(max(probs))
+                    q_values = q_values.tolist() if hasattr(q_values, 'tolist') else list(q_values)
+                else:
+                    confidence = 0.33
+                    q_values = [0.33, 0.33, 0.34]  # Default uniform distribution
+                
+                # Handle case where action is None (HOLD)
+                if action is None:
+                    action = 2  # Map None to HOLD action
+                
+>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            else:
                # Fallback to technical analysis-based prediction
                action, q_values, confidence = self._technical_analysis_prediction(symbol)
@@ -2138,8 +2470,8 @@ class EnhancedRealtimeTrainingSystem:
            if symbol in self.pending_predictions:
                self.pending_predictions[symbol].append(prediction)
            
-            # Add to recent predictions for display (only if confident enough)
-            if confidence > 0.4:
+            # Add to recent predictions for display (only if confident enough AND valid price)
+            if confidence > 0.4 and current_price > 0:
                display_prediction = {
                    'timestamp': prediction_time,
                    'price': current_price,
@@ -2152,6 +2484,7 @@ class EnhancedRealtimeTrainingSystem:
            
            self.last_prediction_time[symbol] = int(current_time)
            
+<<<<<<< HEAD
            # Robust action labeling
            if action is None:
                action_label = 'HOLD'
@@ -2163,10 +2496,46 @@ class EnhancedRealtimeTrainingSystem:
                action_label = 'UNKNOWN'
            
            logger.info(f"Forward DQN prediction: {symbol} action={action_label} confidence={confidence:.2f} target={target_time.strftime('%H:%M:%S')}")
+=======
+            logger.info(f"Forward DQN prediction: {symbol} action={['BUY','SELL','HOLD'][action]} confidence={confidence:.2f} price=${current_price:.2f} target={target_time.strftime('%H:%M:%S')} dims={len(current_state)}")
+>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            
        except Exception as e:
            logger.error(f"Error generating forward DQN prediction: {e}")

+    def _get_dqn_q_values(self, state: np.ndarray) -> Optional[np.ndarray]:
+        """Get Q-values from DQN agent without performing action selection"""
+        try:
+            if not self.orchestrator or not hasattr(self.orchestrator, 'rl_agent') or not self.orchestrator.rl_agent:
+                return None
+            
+            rl_agent = self.orchestrator.rl_agent
+            
+            # Convert state to tensor
+            if isinstance(state, np.ndarray):
+                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(rl_agent.device)
+            else:
+                state_tensor = state.unsqueeze(0).to(rl_agent.device)
+            
+            # Get Q-values directly from policy network
+            with torch.no_grad():
+                policy_output = rl_agent.policy_net(state_tensor)
+                
+                # Handle different output formats
+                if isinstance(policy_output, dict):
+                    q_values = policy_output.get('q_values', policy_output.get('Q_values', list(policy_output.values())[0]))
+                elif isinstance(policy_output, tuple):
+                    q_values = policy_output[0]  # Assume first element is Q-values
+                else:
+                    q_values = policy_output
+                
+                # Convert to numpy
+                return q_values.cpu().data.numpy()[0]
+                
+        except Exception as e:
+            logger.debug(f"Error getting DQN Q-values: {e}")
+            return None
+
    def _generate_forward_cnn_prediction(self, symbol: str, current_time: float):
        """Generate a CNN prediction for future price direction"""
        try:
@@ -2174,9 +2543,15 @@ class EnhancedRealtimeTrainingSystem:
            current_price = self._get_current_price_from_data(symbol)
            price_sequence = self._get_historical_price_sequence(symbol, periods=15)
            
-            if current_price is None or len(price_sequence) < 15:
+            # SKIP prediction if price is invalid
+            if current_price is None or current_price <= 0:
+                logger.debug(f"Skipping CNN prediction for {symbol}: invalid price {current_price}")
                return
-            
+                
+            if len(price_sequence) < 15:
+                logger.debug(f"Skipping CNN prediction for {symbol}: insufficient data")
+                return
+
            # Use CNN model to predict direction (if available)
            if (self.orchestrator and hasattr(self.orchestrator, 'cnn_model') 
                and self.orchestrator.cnn_model):
@@ -2229,8 +2604,8 @@ class EnhancedRealtimeTrainingSystem:
            if symbol in self.pending_predictions:
                self.pending_predictions[symbol].append(prediction)
            
-            # Add to recent predictions for display (only if confident enough)
-            if confidence > 0.5:
+            # Add to recent predictions for display (only if confident enough AND valid prices)
+            if confidence > 0.5 and current_price > 0 and predicted_price > 0:
                display_prediction = {
                    'timestamp': prediction_time,
                    'current_price': current_price,
@@ -2241,7 +2616,7 @@ class EnhancedRealtimeTrainingSystem:
                if symbol in self.recent_cnn_predictions:
                    self.recent_cnn_predictions[symbol].append(display_prediction)
            
-            logger.info(f"Forward CNN prediction: {symbol} direction={['DOWN','SAME','UP'][direction]} confidence={confidence:.2f} target={target_time.strftime('%H:%M:%S')}")
+            logger.info(f"Forward CNN prediction: {symbol} direction={['DOWN','SAME','UP'][direction]} confidence={confidence:.2f} price=${current_price:.2f} -> ${predicted_price:.2f} target={target_time.strftime('%H:%M:%S')}")
            
        except Exception as e:
            logger.error(f"Error generating forward CNN prediction: {e}")
@@ -2332,8 +2707,24 @@ class EnhancedRealtimeTrainingSystem:
    def _get_current_price_from_data(self, symbol: str) -> Optional[float]:
        """Get current price from real-time data streams"""
        try:
+            # First, try to get from data provider (most reliable)
+            if self.data_provider:
+                price = self.data_provider.get_current_price(symbol)
+                if price and price > 0:
+                    return price
+            
+            # Fallback to internal buffer
            if len(self.real_time_data['ohlcv_1m']) > 0:
-                return self.real_time_data['ohlcv_1m'][-1]['close']
+                price = self.real_time_data['ohlcv_1m'][-1]['close']
+                if price and price > 0:
+                    return price
+            
+            # Fallback to orchestrator price
+            if self.orchestrator:
+                price = self.orchestrator._get_current_price(symbol)
+                if price and price > 0:
+                    return price
+            
            return None
        except Exception as e:
            logger.debug(f"Error getting current price: {e}")
@@ -2428,4 +2819,56 @@ class EnhancedRealtimeTrainingSystem:
                
        except Exception as e:
            logger.debug(f"Error estimating price change: {e}")
-            return 0.0 
+            return 0.0     d
+ef _save_model_checkpoint(self, model_name: str, model_obj, loss: float):
+        """
+        Save model checkpoint after training if performance improved
+        
+        This is CRITICAL for preserving training progress across restarts.
+        """
+        try:
+            if not CHECKPOINT_MANAGER_AVAILABLE:
+                return
+            
+            # Get checkpoint manager
+            checkpoint_manager = get_checkpoint_manager()
+            if not checkpoint_manager:
+                return
+            
+            # Prepare performance metrics
+            performance_metrics = {
+                'loss': loss,
+                'training_samples': len(self.experience_buffer),
+                'timestamp': datetime.now().isoformat()
+            }
+            
+            # Prepare training metadata
+            training_metadata = {
+                'timestamp': datetime.now().isoformat(),
+                'training_iteration': self.training_iteration,
+                'model_type': model_name
+            }
+            
+            # Determine model type based on model name
+            model_type = model_name
+            if 'dqn' in model_name.lower():
+                model_type = 'dqn'
+            elif 'cnn' in model_name.lower():
+                model_type = 'cnn'
+            elif 'cob' in model_name.lower():
+                model_type = 'cob_rl'
+            
+            # Save checkpoint
+            checkpoint_path = save_checkpoint(
+                model=model_obj,
+                model_name=model_name,
+                model_type=model_type,
+                performance_metrics=performance_metrics,
+                training_metadata=training_metadata
+            )
+            
+            if checkpoint_path:
+                logger.info(f"💾 Saved checkpoint for {model_name}: {checkpoint_path} (loss: {loss:.4f})")
+            
+        except Exception as e:
+            logger.error(f"Error saving checkpoint for {model_name}: {e}")