misc

2025-05-13 17:19:52 +03:00
parent 7dda00b64a
commit c0872248ab
60 changed files with 42085 additions and 6885 deletions
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@@ -78,17 +78,25 @@ class CNNPyTorch(nn.Module):
        window_size, num_features = input_shape
        self.window_size = window_size
        
-        # Simpler architecture with fewer layers and dropout
+        # Increased complexity
        self.conv1 = nn.Sequential(
-            nn.Conv1d(num_features, 32, kernel_size=3, padding=1),
-            nn.BatchNorm1d(32),
+            nn.Conv1d(num_features, 64, kernel_size=3, padding=1), # Increased filters
+            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        self.conv2 = nn.Sequential(
-            nn.Conv1d(32, 64, kernel_size=3, padding=1),
-            nn.BatchNorm1d(64),
+            nn.Conv1d(64, 128, kernel_size=3, padding=1), # Increased filters
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.Dropout(0.2)
+        )
+
+        # Added third conv layer
+        self.conv3 = nn.Sequential(
+            nn.Conv1d(128, 128, kernel_size=3, padding=1), 
+            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
@@ -96,12 +104,12 @@ class CNNPyTorch(nn.Module):
        # Global average pooling to handle variable length sequences
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
-        # Fully connected layers
+        # Fully connected layers (updated input size and hidden size)
        self.fc = nn.Sequential(
-            nn.Linear(64, 32),
+            nn.Linear(128, 64), # Updated input size from conv3, increased hidden size
            nn.ReLU(),
            nn.Dropout(0.2),
-            nn.Linear(32, output_size)
+            nn.Linear(64, output_size)
        )
    
    def forward(self, x):
@@ -120,10 +128,11 @@ class CNNPyTorch(nn.Module):
        # Convolutional layers
        x = self.conv1(x)
        x = self.conv2(x)
+        x = self.conv3(x) # Added conv3 pass
        
        # Global pooling
        x = self.global_pool(x)
-        x = x.squeeze(-1)
+        x = x.squeeze(-1) # Shape becomes [batch, 128]
        
        # Fully connected layers
        action_logits = self.fc(x)
@@ -216,6 +225,8 @@ class CNNModelPyTorch:
        self.last_actions = [[] for _ in range(num_pairs)]  # Track recent actions per pair
    
    def train_epoch(self, X_train, y_train, future_prices, batch_size):
+        # Add a call to predict_extrema here
+        self.predict_extrema(X_train)
        """Train the model for one epoch with focus on short-term pattern recognition"""
        self.model.train()
        total_loss = 0
@@ -321,7 +332,8 @@ class CNNModelPyTorch:
        
        return avg_loss, 0, accuracy  # Return 0 for price_loss as we're not using it

-    def predict(self, X):
+    def predict_extrema(self, X):
+        # Predict local extrema (lows and highs) based on input data
        """Make predictions optimized for short-term high-leverage trading signals"""
        self.model.eval()
        
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -54,6 +54,7 @@ class DQNAgent:
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
+        self.epsilon_start = epsilon  # Store initial epsilon value for resets/bumps
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.target_update = target_update
@@ -127,6 +128,28 @@ class DQNAgent:
        self.best_reward = -float('inf')
        self.no_improvement_count = 0
        
+        # Confidence tracking
+        self.confidence_history = []
+        self.avg_confidence = 0.0
+        self.max_confidence = 0.0
+        self.min_confidence = 1.0
+        
+        # Trade action fee and confidence thresholds
+        self.trade_action_fee = 0.0005  # Small fee to discourage unnecessary trading
+        self.minimum_action_confidence = 0.5  # Minimum confidence to consider trading
+        self.recent_actions = []  # Track recent actions to avoid oscillations
+        
+        # Violent move detection
+        self.price_history = []
+        self.volatility_window = 20  # Window size for volatility calculation
+        self.volatility_threshold = 0.0015  # Threshold for considering a move "violent"
+        self.post_violent_move = False  # Flag for recent violent move
+        self.violent_move_cooldown = 0  # Cooldown after violent move
+        
+        # Feature integration
+        self.last_hidden_features = None  # Store last extracted features
+        self.feature_history = []  # Store history of features for analysis
+        
        # Check if mixed precision training should be used
        self.use_mixed_precision = False
        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
@@ -146,6 +169,7 @@ class DQNAgent:
        self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0]]  # Default timeframes
        
        logger.info(f"DQN Agent using device: {self.device}")
+        logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
        
    def move_models_to_device(self, device=None):
        """Move models to the specified device (GPU/CPU)"""
@@ -189,8 +213,20 @@ class DQNAgent:
                current_price = state[-1]  # Last feature
                next_price = next_state[-1]
            
-            # Calculate price change
-            price_change = (next_price - current_price) / current_price
+            # Calculate price change - avoid division by zero
+            if np.isscalar(current_price) and current_price != 0:
+                price_change = (next_price - current_price) / current_price
+            elif isinstance(current_price, np.ndarray):
+                # Handle array case - protect against division by zero
+                with np.errstate(divide='ignore', invalid='ignore'):
+                    price_change = (next_price - current_price) / current_price
+                # Replace infinities and NaNs with zeros
+                if isinstance(price_change, np.ndarray):
+                    price_change = np.nan_to_num(price_change, nan=0.0, posinf=0.0, neginf=0.0)
+                else:
+                    price_change = 0.0 if np.isnan(price_change) or np.isinf(price_change) else price_change
+            else:
+                price_change = 0.0
            
            # Check if this is a significant price movement
            if abs(price_change) > 0.002:  # Significant price change
@@ -264,9 +300,17 @@ class DQNAgent:
            
            # Get predictions using the policy network
            self.policy_net.eval()  # Set to evaluation mode for inference
-            action_probs, extrema_pred, price_predictions = self.policy_net(state_tensor)
+            action_probs, extrema_pred, price_predictions, hidden_features = self.policy_net(state_tensor)
            self.policy_net.train()  # Back to training mode
            
+            # Store hidden features for integration
+            self.last_hidden_features = hidden_features.cpu().numpy()
+            
+            # Track feature history (limited size)
+            self.feature_history.append(hidden_features.cpu().numpy())
+            if len(self.feature_history) > 100:
+                self.feature_history = self.feature_history[-100:]
+            
            # Get the predicted extrema class (0=bottom, 1=top, 2=neither)
            extrema_class = extrema_pred.argmax(dim=1).item()
            extrema_confidence = torch.softmax(extrema_pred, dim=1)[0, extrema_class].item()
@@ -336,17 +380,120 @@ class DQNAgent:
            # Get the action with highest Q-value
            action = action_probs.argmax().item()
            
+            # Calculate overall confidence in the action
+            q_values_softmax = F.softmax(action_probs, dim=1)[0]
+            action_confidence = q_values_softmax[action].item()
+            
+            # Track confidence metrics
+            self.confidence_history.append(action_confidence)
+            if len(self.confidence_history) > 100:
+                self.confidence_history = self.confidence_history[-100:]
+            
+            # Update confidence metrics
+            self.avg_confidence = sum(self.confidence_history) / len(self.confidence_history)
+            self.max_confidence = max(self.max_confidence, action_confidence)
+            self.min_confidence = min(self.min_confidence, action_confidence)
+            
+            # Log average confidence occasionally
+            if random.random() < 0.01:  # 1% of the time
+                logger.info(f"Confidence metrics - Current: {action_confidence:.4f}, Avg: {self.avg_confidence:.4f}, " +
+                           f"Min: {self.min_confidence:.4f}, Max: {self.max_confidence:.4f}")
+            
+            # Track price for violent move detection
+            try:
+                # Extract current price from state (assuming it's in the last position)
+                if len(state.shape) > 1:  # For 2D state
+                    current_price = state[-1, -1]
+                else:  # For 1D state
+                    current_price = state[-1]
+                
+                self.price_history.append(current_price)
+                if len(self.price_history) > self.volatility_window:
+                    self.price_history = self.price_history[-self.volatility_window:]
+                
+                # Detect violent price moves if we have enough price history
+                if len(self.price_history) >= 5:
+                    # Calculate short-term volatility
+                    recent_prices = self.price_history[-5:]
+                    
+                    # Make sure we're working with scalar values, not arrays
+                    if isinstance(recent_prices[0], np.ndarray):
+                        # If prices are arrays, extract the last value (current price)
+                        recent_prices = [p[-1] if isinstance(p, np.ndarray) and p.size > 0 else p for p in recent_prices]
+                    
+                    # Calculate price changes with protection against division by zero
+                    price_changes = []
+                    for i in range(1, len(recent_prices)):
+                        if recent_prices[i-1] != 0 and not np.isnan(recent_prices[i-1]) and not np.isnan(recent_prices[i]):
+                            change = (recent_prices[i] - recent_prices[i-1]) / recent_prices[i-1]
+                            price_changes.append(change)
+                        else:
+                            price_changes.append(0.0)
+                    
+                    # Calculate volatility as sum of absolute price changes
+                    volatility = sum([abs(change) for change in price_changes])
+                    
+                    # Check if we've had a violent move
+                    if volatility > self.volatility_threshold:
+                        logger.info(f"Violent price move detected! Volatility: {volatility:.6f}")
+                        self.post_violent_move = True
+                        self.violent_move_cooldown = 10  # Set cooldown period
+                    
+                    # Handle post-violent move period
+                    if self.post_violent_move:
+                        if self.violent_move_cooldown > 0:
+                            self.violent_move_cooldown -= 1
+                            # Increase confidence threshold temporarily after violent moves
+                            effective_threshold = self.minimum_action_confidence * 1.1
+                            logger.info(f"Post-violent move period: {self.violent_move_cooldown} steps remaining. " +
+                                       f"Using higher confidence threshold: {effective_threshold:.4f}")
+                        else:
+                            self.post_violent_move = False
+                            logger.info("Post-violent move period ended")
+            except Exception as e:
+                logger.warning(f"Error in violent move detection: {str(e)}")
+            
+            # Apply trade action fee to buy/sell actions but not to hold
+            # This creates a threshold that must be exceeded to justify a trade
+            action_values = action_probs.clone()
+            
+            # If BUY or SELL, apply fee by reducing the Q-value
+            if action == 0 or action == 1:  # BUY or SELL
+                # Check if confidence is above minimum threshold
+                effective_threshold = self.minimum_action_confidence
+                if self.post_violent_move:
+                    effective_threshold *= 1.1  # Higher threshold after violent moves
+                
+                if action_confidence < effective_threshold:
+                    # If confidence is below threshold, force HOLD action
+                    logger.info(f"Action {action} confidence {action_confidence:.4f} below threshold {effective_threshold}, forcing HOLD")
+                    action = 2  # HOLD
+                else:
+                    # Apply trade action fee to ensure we only trade when there's clear benefit
+                    fee_adjusted_action_values = action_values.clone()
+                    fee_adjusted_action_values[0, 0] -= self.trade_action_fee  # Reduce BUY value 
+                    fee_adjusted_action_values[0, 1] -= self.trade_action_fee  # Reduce SELL value
+                    # Hold value remains unchanged
+                    
+                    # Re-determine the action based on fee-adjusted values
+                    fee_adjusted_action = fee_adjusted_action_values.argmax().item()
+                    
+                    # If the fee changes our decision, log this
+                    if fee_adjusted_action != action:
+                        logger.info(f"Trade action fee changed decision from {action} to {fee_adjusted_action}")
+                        action = fee_adjusted_action
+            
            # Adjust action based on extrema and price predictions
            # Prioritize short-term movement for trading decisions
            if immediate_conf > 0.8:  # Only adjust for strong signals
                if immediate_direction == 2:  # UP prediction
                    # Bias toward BUY for strong up predictions
-                    if action != 0 and random.random() < 0.3 * immediate_conf:
+                    if action != 0 and action != 2 and random.random() < 0.3 * immediate_conf:
                        logger.info(f"Adjusting action to BUY based on immediate UP prediction")
                        action = 0  # BUY
                elif immediate_direction == 0:  # DOWN prediction
                    # Bias toward SELL for strong down predictions
-                    if action != 1 and random.random() < 0.3 * immediate_conf:
+                    if action != 1 and action != 2 and random.random() < 0.3 * immediate_conf:
                        logger.info(f"Adjusting action to SELL based on immediate DOWN prediction")
                        action = 1  # SELL
            
@@ -354,333 +501,217 @@ class DQNAgent:
            if extrema_confidence > 0.8:  # Only adjust for strong signals
                if extrema_class == 0:  # Bottom detected
                    # Bias toward BUY at bottoms
-                    if action != 0 and random.random() < 0.3 * extrema_confidence:
+                    if action != 0 and action != 2 and random.random() < 0.3 * extrema_confidence:
                        logger.info(f"Adjusting action to BUY based on bottom detection")
                        action = 0  # BUY
                elif extrema_class == 1:  # Top detected
                    # Bias toward SELL at tops
-                    if action != 1 and random.random() < 0.3 * extrema_confidence:
+                    if action != 1 and action != 2 and random.random() < 0.3 * extrema_confidence:
                        logger.info(f"Adjusting action to SELL based on top detection")
                        action = 1  # SELL
            
+            # Finally, avoid action oscillation by checking recent history
+            if len(self.recent_actions) >= 2:
+                last_action = self.recent_actions[-1]
+                if action != last_action and action != 2 and last_action != 2:
+                    # We're switching between BUY and SELL too quickly
+                    # Only allow this if we have very high confidence
+                    if action_confidence < 0.85:
+                        logger.info(f"Preventing oscillation from {last_action} to {action}, forcing HOLD")
+                        action = 2  # HOLD
+            
+            # Update recent actions list
+            self.recent_actions.append(action)
+            if len(self.recent_actions) > 5:
+                self.recent_actions = self.recent_actions[-5:]
+            
            return action

-    def replay(self, use_prioritized=True) -> float:
-        """Experience replay - learn from stored experiences
-
-        Args:
-            use_prioritized: Whether to use prioritized experience replay
-            
-        Returns:
-            float: Training loss
-        """
-        # Check if we have enough samples
-        if len(self.memory) < self.batch_size:
+    def replay(self, experiences=None):
+        """Train the model using experiences from memory"""
+        
+        # Don't train if not in training mode
+        if not self.training:
            return 0.0
        
-        # Check if mixed precision should be disabled
-        if 'DISABLE_MIXED_PRECISION' in os.environ:
-            self.use_mixed_precision = False
+        # If no experiences provided, sample from memory
+        if experiences is None:
+            # Skip if memory is too small
+            if len(self.memory) < self.batch_size:
+                return 0.0
            
-        # Sample from memory with or without prioritization
-        if use_prioritized and len(self.positive_memory) > self.batch_size // 4:
-            # Use prioritized sampling: mix normal samples with positive reward samples
-            positive_batch_size = min(self.batch_size // 4, len(self.positive_memory))
-            regular_batch_size = self.batch_size - positive_batch_size
-            
-            # Get positive examples
-            positive_batch = random.sample(self.positive_memory, positive_batch_size)
-            
-            # Get regular examples
-            regular_batch = random.sample(self.memory, regular_batch_size)
-            
-            # Combine batches
-            minibatch = positive_batch + regular_batch
-        else:
-            # Use regular uniform sampling
-            minibatch = random.sample(self.memory, self.batch_size)
+            # Sample random mini-batch from memory
+            indices = np.random.choice(len(self.memory), size=min(self.batch_size, len(self.memory)), replace=False)
+            experiences = [self.memory[i] for i in indices]
        
-        # Extract batches with proper tensor conversion
-        states = np.vstack([self._normalize_state(x[0]) for x in minibatch])
-        actions = np.array([x[1] for x in minibatch])
-        rewards = np.array([x[2] for x in minibatch])
-        next_states = np.vstack([self._normalize_state(x[3]) for x in minibatch])
-        dones = np.array([x[4] for x in minibatch], dtype=np.float32)
-        
-        # Convert to torch tensors and move to device
-        states_tensor = torch.FloatTensor(states).to(self.device)
-        actions_tensor = torch.LongTensor(actions).to(self.device)
-        rewards_tensor = torch.FloatTensor(rewards).to(self.device)
-        next_states_tensor = torch.FloatTensor(next_states).to(self.device)
-        dones_tensor = torch.FloatTensor(dones).to(self.device)
-        
-        # First training step with mixed precision if available
+        # Choose appropriate replay method
        if self.use_mixed_precision:
-            loss = self._replay_mixed_precision(
-                states_tensor, actions_tensor, rewards_tensor, 
-                next_states_tensor, dones_tensor
-            )
+            # Convert experiences to tensors for mixed precision
+            states = torch.FloatTensor(np.array([e[0] for e in experiences])).to(self.device)
+            actions = torch.LongTensor(np.array([e[1] for e in experiences])).to(self.device)
+            rewards = torch.FloatTensor(np.array([e[2] for e in experiences])).to(self.device)
+            next_states = torch.FloatTensor(np.array([e[3] for e in experiences])).to(self.device)
+            dones = torch.FloatTensor(np.array([e[4] for e in experiences])).to(self.device)
+            
+            # Use mixed precision replay
+            loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
        else:
-            loss = self._replay_standard(
-                states_tensor, actions_tensor, rewards_tensor,
-                next_states_tensor, dones_tensor
-            )
+            # Pass experiences directly to standard replay method
+            loss = self._replay_standard(experiences)
            
-        # Training focus selector - randomly focus on one of the specialized training types
-        training_focus = random.random()
-        
-        # Occasionally train specifically on extrema points
-        if training_focus < 0.3 and hasattr(self, 'extrema_memory') and len(self.extrema_memory) >= self.batch_size // 2:
-            # Sample from extrema memory
-            extrema_batch_size = min(self.batch_size // 2, len(self.extrema_memory))
-            extrema_batch = random.sample(self.extrema_memory, extrema_batch_size)
-            
-            # Extract batches with proper tensor conversion
-            extrema_states = np.vstack([self._normalize_state(x[0]) for x in extrema_batch])
-            extrema_actions = np.array([x[1] for x in extrema_batch])
-            extrema_rewards = np.array([x[2] for x in extrema_batch])
-            extrema_next_states = np.vstack([self._normalize_state(x[3]) for x in extrema_batch])
-            extrema_dones = np.array([x[4] for x in extrema_batch], dtype=np.float32)
-            
-            # Convert to torch tensors and move to device
-            extrema_states_tensor = torch.FloatTensor(extrema_states).to(self.device)
-            extrema_actions_tensor = torch.LongTensor(extrema_actions).to(self.device)
-            extrema_rewards_tensor = torch.FloatTensor(extrema_rewards).to(self.device)
-            extrema_next_states_tensor = torch.FloatTensor(extrema_next_states).to(self.device)
-            extrema_dones_tensor = torch.FloatTensor(extrema_dones).to(self.device)
-            
-            # Additional training step focused on extrema points (with smaller learning rate)
-            original_lr = self.optimizer.param_groups[0]['lr']
-            # Temporarily reduce learning rate for fine-tuning on extrema
-            for param_group in self.optimizer.param_groups:
-                param_group['lr'] = original_lr * 0.5
-                
-            # Train on extrema
-            if self.use_mixed_precision:
-                extrema_loss = self._replay_mixed_precision(
-                    extrema_states_tensor, extrema_actions_tensor, extrema_rewards_tensor,
-                    extrema_next_states_tensor, extrema_dones_tensor
-                )
-            else:
-                extrema_loss = self._replay_standard(
-                    extrema_states_tensor, extrema_actions_tensor, extrema_rewards_tensor,
-                    extrema_next_states_tensor, extrema_dones_tensor
-                )
-            
-            # Restore original learning rate
-            for param_group in self.optimizer.param_groups:
-                param_group['lr'] = original_lr
-            
-            logger.info(f"Extra training on extrema points: loss={extrema_loss:.4f}")
-            
-            # Average the loss
-            loss = (loss + extrema_loss) / 2
-            
-        # Occasionally train specifically on price movement data
-        elif training_focus >= 0.3 and training_focus < 0.6 and hasattr(self, 'price_movement_memory') and len(self.price_movement_memory) >= self.batch_size // 2:
-            # Sample from price movement memory
-            price_batch_size = min(self.batch_size // 2, len(self.price_movement_memory))
-            price_batch = random.sample(self.price_movement_memory, price_batch_size)
-            
-            # Extract batches with proper tensor conversion
-            price_states = np.vstack([self._normalize_state(x[0]) for x in price_batch])
-            price_actions = np.array([x[1] for x in price_batch])
-            price_rewards = np.array([x[2] for x in price_batch])
-            price_next_states = np.vstack([self._normalize_state(x[3]) for x in price_batch])
-            price_dones = np.array([x[4] for x in price_batch], dtype=np.float32)
-            
-            # Convert to torch tensors and move to device
-            price_states_tensor = torch.FloatTensor(price_states).to(self.device)
-            price_actions_tensor = torch.LongTensor(price_actions).to(self.device)
-            price_rewards_tensor = torch.FloatTensor(price_rewards).to(self.device)
-            price_next_states_tensor = torch.FloatTensor(price_next_states).to(self.device)
-            price_dones_tensor = torch.FloatTensor(price_dones).to(self.device)
-            
-            # Additional training step focused on price movements (with smaller learning rate)
-            original_lr = self.optimizer.param_groups[0]['lr']
-            # Temporarily reduce learning rate
-            for param_group in self.optimizer.param_groups:
-                param_group['lr'] = original_lr * 0.5
-                
-            # Train on price movement data
-            if self.use_mixed_precision:
-                price_loss = self._replay_mixed_precision(
-                    price_states_tensor, price_actions_tensor, price_rewards_tensor,
-                    price_next_states_tensor, price_dones_tensor
-                )
-            else:
-                price_loss = self._replay_standard(
-                    price_states_tensor, price_actions_tensor, price_rewards_tensor,
-                    price_next_states_tensor, price_dones_tensor
-                )
-            
-            # Restore original learning rate
-            for param_group in self.optimizer.param_groups:
-                param_group['lr'] = original_lr
-            
-            logger.info(f"Extra training on price movement data: loss={price_loss:.4f}")
-            
-            # Average the loss
-            loss = (loss + price_loss) / 2
-        
-        # Store and return loss
+        # Store loss for monitoring
        self.losses.append(loss)
-        return loss
-            
-    def _replay_standard(self, states, actions, rewards, next_states, dones):
-        """Standard precision training step"""
-        # Zero gradients
-        self.optimizer.zero_grad()
-        
-        # Get current Q values and extrema predictions
-        current_q_values, current_extrema_pred, current_price_pred = self.policy_net(states)
-        current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
-        
-        # Get next Q values from target network
-        with torch.no_grad():
-            next_q_values, next_extrema_pred, next_price_pred = self.target_net(next_states)
-            next_q_values = next_q_values.max(1)[0]
-            
-            # Check for dimension mismatch and fix it
-            if rewards.shape[0] != next_q_values.shape[0]:
-                # Log the shape mismatch for debugging
-                logger.warning(f"Shape mismatch detected in standard replay: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
-                # Use the smaller size to prevent index errors
-                min_size = min(rewards.shape[0], next_q_values.shape[0])
-                rewards = rewards[:min_size]
-                dones = dones[:min_size]
-                next_q_values = next_q_values[:min_size]
-                current_q_values = current_q_values[:min_size]
-                
-            target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
-        
-        # Compute Q-value loss (primary task)
-        q_loss = nn.MSELoss()(current_q_values, target_q_values)
-        
-        # Initialize combined loss with Q-value loss
-        loss = q_loss
-        
-        # Try to extract price from current and next states
-        try:
-            # Extract price feature from sequence data (if available)
-            if len(states.shape) == 3:  # [batch, seq, features]
-                current_prices = states[:, -1, -1]  # Last timestep, last feature
-                next_prices = next_states[:, -1, -1]
-            else:  # [batch, features]
-                current_prices = states[:, -1]  # Last feature
-                next_prices = next_states[:, -1]
-                
-            # Compute price changes for different timeframes
-            immediate_changes = (next_prices - current_prices) / current_prices
-            
-            # Create price direction labels - simplified for training
-            # 0 = down, 1 = sideways, 2 = up
-            immediate_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1  # Default: sideways
-            midterm_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1
-            longterm_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 1
-            
-            # Immediate term direction (1s, 1m)
-            immediate_up = (immediate_changes > 0.0005)
-            immediate_down = (immediate_changes < -0.0005)
-            immediate_labels[immediate_up] = 2    # Up
-            immediate_labels[immediate_down] = 0  # Down
-            
-            # For mid and long term, we can only approximate during training
-            # In a real system, we'd need historical data to validate these
-            # Here we'll use the immediate term with increasing thresholds as approximation
-            
-            # Mid-term (1h) - use slightly higher threshold
-            midterm_up = (immediate_changes > 0.001)
-            midterm_down = (immediate_changes < -0.001)
-            midterm_labels[midterm_up] = 2    # Up
-            midterm_labels[midterm_down] = 0  # Down
-            
-            # Long-term (1d) - use even higher threshold
-            longterm_up = (immediate_changes > 0.002)
-            longterm_down = (immediate_changes < -0.002)
-            longterm_labels[longterm_up] = 2    # Up
-            longterm_labels[longterm_down] = 0  # Down
-            
-            # Generate target values for price change regression
-            # For simplicity, we'll use the immediate change and scaled versions for longer timeframes
-            price_value_targets = torch.zeros((min_size, 4), device=self.device)
-            price_value_targets[:, 0] = immediate_changes
-            price_value_targets[:, 1] = immediate_changes * 2.0  # Approximate 1h change
-            price_value_targets[:, 2] = immediate_changes * 4.0  # Approximate 1d change
-            price_value_targets[:, 3] = immediate_changes * 6.0  # Approximate 1w change
-            
-            # Calculate loss for price direction prediction (classification)
-            if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= min_size:
-                # Slice predictions to match the adjusted batch size
-                immediate_pred = current_price_pred['immediate'][:min_size]
-                midterm_pred = current_price_pred['midterm'][:min_size]
-                longterm_pred = current_price_pred['longterm'][:min_size]
-                price_values_pred = current_price_pred['values'][:min_size]
-                
-                # Compute losses for each task
-                immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
-                midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
-                longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
-                
-                # MSE loss for price value regression
-                price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
-                
-                # Combine all price prediction losses
-                price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
-                
-                # Create extrema labels (same as before)
-                extrema_labels = torch.ones(min_size, dtype=torch.long, device=self.device) * 2  # Default: neither
-                
-                # Identify potential bottoms (significant negative change)
-                bottoms = (immediate_changes < -0.003)
-                extrema_labels[bottoms] = 0
-                
-                # Identify potential tops (significant positive change)
-                tops = (immediate_changes > 0.003)
-                extrema_labels[tops] = 1
-                
-                # Calculate extrema prediction loss
-                if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= min_size:
-                    current_extrema_pred = current_extrema_pred[:min_size]
-                    extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
-                    
-                    # Combined loss with all components
-                    # Primary task: Q-value learning (RL objective)
-                    # Secondary tasks: extrema detection and price prediction (supervised objectives)
-                    loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
-                    
-                    # Log loss components occasionally
-                    if random.random() < 0.01:  # Log 1% of the time
-                        logger.info(
-                            f"Training losses: Q-loss={q_loss.item():.4f}, "
-                            f"Extrema-loss={extrema_loss.item():.4f}, "
-                            f"Price-loss={price_loss.item():.4f}, "
-                            f"Imm-loss={immediate_loss.item():.4f}, "
-                            f"Mid-loss={midterm_loss.item():.4f}, "
-                            f"Long-loss={longterm_loss.item():.4f}"
-                        )
-        except Exception as e:
-            # Fallback if price extraction fails
-            logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
-            # Just use Q-value loss
-            loss = q_loss
-        
-        # Backward pass and optimize
-        loss.backward()
-        
-        # Gradient clipping to prevent exploding gradients
-        torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
-        self.optimizer.step()
-        
-        # Update target network if needed
-        self.update_count += 1
-        if self.update_count % self.target_update == 0:
-            self.target_net.load_state_dict(self.policy_net.state_dict())
        
        # Track and decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
        
-        return loss.item()
+        # Randomly decide if we should train on extrema points from special memory
+        if random.random() < 0.3 and len(self.extrema_memory) >= self.batch_size:
+            # Train specifically on extrema memory examples
+            extrema_indices = np.random.choice(len(self.extrema_memory), size=min(self.batch_size, len(self.extrema_memory)), replace=False)
+            extrema_batch = [self.extrema_memory[i] for i in extrema_indices]
+            
+            # Extract tensors from extrema batch
+            extrema_states = torch.FloatTensor(np.array([e[0] for e in extrema_batch])).to(self.device)
+            extrema_actions = torch.LongTensor(np.array([e[1] for e in extrema_batch])).to(self.device)
+            extrema_rewards = torch.FloatTensor(np.array([e[2] for e in extrema_batch])).to(self.device)
+            extrema_next_states = torch.FloatTensor(np.array([e[3] for e in extrema_batch])).to(self.device)
+            extrema_dones = torch.FloatTensor(np.array([e[4] for e in extrema_batch])).to(self.device)
+            
+            # Use a slightly reduced learning rate for extrema training
+            old_lr = self.optimizer.param_groups[0]['lr']
+            self.optimizer.param_groups[0]['lr'] = old_lr * 0.8
+            
+            # Train on extrema memory
+            if self.use_mixed_precision:
+                extrema_loss = self._replay_mixed_precision(extrema_states, extrema_actions, extrema_rewards, extrema_next_states, extrema_dones)
+            else:
+                extrema_loss = self._replay_standard(extrema_batch)
+            
+            # Reset learning rate
+            self.optimizer.param_groups[0]['lr'] = old_lr
+            
+            # Log extrema loss 
+            logger.info(f"Extra training on extrema points, loss: {extrema_loss:.4f}")
+        
+        # Randomly train on price movement examples (similar to extrema)
+        if random.random() < 0.3 and len(self.price_movement_memory) >= self.batch_size:
+            # Train specifically on price movement memory examples
+            price_indices = np.random.choice(len(self.price_movement_memory), size=min(self.batch_size, len(self.price_movement_memory)), replace=False)
+            price_batch = [self.price_movement_memory[i] for i in price_indices]
+            
+            # Extract tensors from price movement batch
+            price_states = torch.FloatTensor(np.array([e[0] for e in price_batch])).to(self.device)
+            price_actions = torch.LongTensor(np.array([e[1] for e in price_batch])).to(self.device)
+            price_rewards = torch.FloatTensor(np.array([e[2] for e in price_batch])).to(self.device)
+            price_next_states = torch.FloatTensor(np.array([e[3] for e in price_batch])).to(self.device)
+            price_dones = torch.FloatTensor(np.array([e[4] for e in price_batch])).to(self.device)
+            
+            # Use a slightly reduced learning rate for price movement training
+            old_lr = self.optimizer.param_groups[0]['lr']
+            self.optimizer.param_groups[0]['lr'] = old_lr * 0.75
+            
+            # Train on price movement memory
+            if self.use_mixed_precision:
+                price_loss = self._replay_mixed_precision(price_states, price_actions, price_rewards, price_next_states, price_dones)
+            else:
+                price_loss = self._replay_standard(price_batch)
+            
+            # Reset learning rate
+            self.optimizer.param_groups[0]['lr'] = old_lr
+            
+            # Log price movement loss 
+            logger.info(f"Extra training on price movement examples, loss: {price_loss:.4f}")
+        
+        return loss
+
+    def _replay_standard(self, experiences=None):
+        """Standard training step without mixed precision"""
+        try:
+            # Use experiences if provided, otherwise sample from memory
+            if experiences is None:
+                # If memory is too small, skip training
+                if len(self.memory) < self.batch_size:
+                    return 0.0
+                
+                # Sample random mini-batch from memory
+                indices = np.random.choice(len(self.memory), size=min(self.batch_size, len(self.memory)), replace=False)
+                batch = [self.memory[i] for i in indices]
+                experiences = batch
+            
+            # Unpack experiences
+            states, actions, rewards, next_states, dones = zip(*experiences)
+            
+            # Convert to PyTorch tensors
+            states = torch.FloatTensor(np.array(states)).to(self.device)
+            actions = torch.LongTensor(np.array(actions)).to(self.device)
+            rewards = torch.FloatTensor(np.array(rewards)).to(self.device)
+            next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+            dones = torch.FloatTensor(np.array(dones)).to(self.device)
+            
+            # Get current Q values
+            current_q_values, current_extrema_pred, current_price_pred, hidden_features = self.policy_net(states)
+            current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
+            
+            # Get next Q values with target network
+            with torch.no_grad():
+                next_q_values, next_extrema_pred, next_price_pred, next_hidden_features = self.target_net(next_states)
+                next_q_values = next_q_values.max(1)[0]
+                
+                # Check for dimension mismatch between rewards and next_q_values
+                if rewards.shape[0] != next_q_values.shape[0]:
+                    logger.warning(f"Shape mismatch detected in standard replay: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
+                    # Use the smaller size to prevent index error
+                    min_size = min(rewards.shape[0], next_q_values.shape[0])
+                    rewards = rewards[:min_size]
+                    dones = dones[:min_size]
+                    next_q_values = next_q_values[:min_size]
+                    current_q_values = current_q_values[:min_size]
+                
+                # Calculate target Q values
+                target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
+            
+            # Compute loss for Q value
+            q_loss = self.criterion(current_q_values, target_q_values)
+            
+            # Try to compute extrema loss if possible
+            try:
+                # Get the target classes from extrema predictions
+                extrema_targets = torch.argmax(current_extrema_pred, dim=1).long()
+                
+                # Compute extrema loss using cross-entropy - this is an auxiliary task
+                extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
+                
+                # Combined loss with emphasis on Q-learning
+                total_loss = q_loss + 0.1 * extrema_loss
+            except Exception as e:
+                logger.warning(f"Failed to calculate extrema loss: {str(e)}. Using only Q-value loss.")
+                total_loss = q_loss
+                
+            # Reset gradients
+            self.optimizer.zero_grad()
+            
+            # Backward pass
+            total_loss.backward()
+            
+            # Clip gradients to avoid exploding gradients
+            torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+            
+            # Update weights
+            self.optimizer.step()
+            
+            # Update target network if needed
+            self.update_count += 1
+            if self.update_count % self.target_update == 0:
+                self.target_net.load_state_dict(self.policy_net.state_dict())
+            
+            # Return loss
+            return total_loss.item()
+        except Exception as e:
+            logger.error(f"Error in replay standard: {str(e)}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return 0.0
    
    def _replay_mixed_precision(self, states, actions, rewards, next_states, dones):
        """Mixed precision training step for better GPU performance"""
@@ -696,12 +727,12 @@ class DQNAgent:
            # Forward pass with amp autocasting
            with torch.cuda.amp.autocast():
                # Get current Q values and extrema predictions
-                current_q_values, current_extrema_pred, current_price_pred = self.policy_net(states)
+                current_q_values, current_extrema_pred, current_price_pred, hidden_features = self.policy_net(states)
                current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
                
                # Get next Q values from target network
                with torch.no_grad():
-                    next_q_values, next_extrema_pred, next_price_pred = self.target_net(next_states)
+                    next_q_values, next_extrema_pred, next_price_pred, next_hidden_features = self.target_net(next_states)
                    next_q_values = next_q_values.max(1)[0]
                    
                    # Check for dimension mismatch and fix it
@@ -733,7 +764,7 @@ class DQNAgent:
                        current_prices = states[:, -1]  # Last feature
                        next_prices = next_states[:, -1]
                    
-                    # Compute price changes for different timeframes
+                    # Calculate price change for different timeframes
                    immediate_changes = (next_prices - current_prices) / current_prices
                    
                    # Create price direction labels - simplified for training
--- a/NN/models/dqn_agent_enhanced.py
+++ b/NN/models/dqn_agent_enhanced.py
@@ -0,0 +1,329 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+from collections import deque
+import random
+from typing import Tuple, List
+import os
+import sys
+import logging
+import torch.nn.functional as F
+
+# Add parent directory to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+# Import the EnhancedCNN model
+from NN.models.enhanced_cnn import EnhancedCNN, ExampleSiftingDataset
+
+# Configure logger
+logger = logging.getLogger(__name__)
+
+class EnhancedDQNAgent:
+    """
+    Enhanced Deep Q-Network agent for trading
+    Uses the improved EnhancedCNN model with residual connections and attention mechanisms
+    """
+    def __init__(self,
+                 state_shape: Tuple[int, ...],
+                 n_actions: int,
+                 learning_rate: float = 0.0003,  # Slightly reduced learning rate for stability
+                 gamma: float = 0.95,            # Discount factor
+                 epsilon: float = 1.0,
+                 epsilon_min: float = 0.05,      
+                 epsilon_decay: float = 0.995,   # Slower decay for more exploration
+                 buffer_size: int = 50000,       # Larger memory buffer
+                 batch_size: int = 128,          # Larger batch size
+                 target_update: int = 10,        # More frequent target updates
+                 confidence_threshold: float = 0.4,  # Lower confidence threshold
+                 device=None):                  
+        
+        # Extract state dimensions
+        if isinstance(state_shape, tuple) and len(state_shape) > 1:
+            # Multi-dimensional state (like image or sequence)
+            self.state_dim = state_shape
+        else:
+            # 1D state
+            if isinstance(state_shape, tuple):
+                self.state_dim = state_shape[0]
+            else:
+                self.state_dim = state_shape
+        
+        # Store parameters
+        self.n_actions = n_actions
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.epsilon_min = epsilon_min
+        self.epsilon_decay = epsilon_decay
+        self.buffer_size = buffer_size
+        self.batch_size = batch_size
+        self.target_update = target_update
+        self.confidence_threshold = confidence_threshold
+        
+        # Set device for computation
+        if device is None:
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = device
+        
+        # Initialize models with the enhanced CNN
+        self.policy_net = EnhancedCNN(self.state_dim, self.n_actions, self.confidence_threshold)
+        self.target_net = EnhancedCNN(self.state_dim, self.n_actions, self.confidence_threshold)
+        
+        # Initialize the target network with the same weights as the policy network
+        self.target_net.load_state_dict(self.policy_net.state_dict())
+        
+        # Set models to eval mode (important for batch norm, dropout)
+        self.target_net.eval()
+        
+        # Optimization components
+        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.learning_rate)
+        self.criterion = nn.MSELoss()
+        
+        # Experience replay memory with example sifting
+        self.memory = ExampleSiftingDataset(max_examples=buffer_size)
+        self.update_count = 0
+        
+        # Confidence tracking
+        self.confidence_history = []
+        self.avg_confidence = 0.0
+        self.max_confidence = 0.0
+        self.min_confidence = 1.0
+        
+        # Performance tracking
+        self.losses = []
+        self.rewards = []
+        self.avg_reward = 0.0
+        
+        # Check if mixed precision training should be used
+        self.use_mixed_precision = False
+        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
+            self.use_mixed_precision = True
+            self.scaler = torch.cuda.amp.GradScaler()
+            logger.info("Mixed precision training enabled")
+        else:
+            logger.info("Mixed precision training disabled")
+            
+        # For compatibility with old code
+        self.action_size = n_actions
+        
+        logger.info(f"Enhanced DQN Agent using device: {self.device}")
+        logger.info(f"Confidence threshold set to {self.confidence_threshold}")
+        
+    def move_models_to_device(self, device=None):
+        """Move models to the specified device (GPU/CPU)"""
+        if device is not None:
+            self.device = device
+            
+        try:
+            self.policy_net = self.policy_net.to(self.device)
+            self.target_net = self.target_net.to(self.device)
+            logger.info(f"Moved models to {self.device}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to move models to {self.device}: {str(e)}")
+            return False
+    
+    def _normalize_state(self, state):
+        """Normalize state for better training stability"""
+        try:
+            # Convert to numpy array if needed
+            if isinstance(state, list):
+                state = np.array(state, dtype=np.float32)
+                
+            # Apply normalization based on state shape
+            if len(state.shape) > 1:
+                # Multi-dimensional state - normalize each feature dimension separately
+                for i in range(state.shape[0]):
+                    # Skip if all zeros (to avoid division by zero)
+                    if np.sum(np.abs(state[i])) > 0:
+                        # Standardize each feature dimension 
+                        mean = np.mean(state[i])
+                        std = np.std(state[i])
+                        if std > 0:
+                            state[i] = (state[i] - mean) / std
+            else:
+                # 1D state vector
+                # Skip if all zeros
+                if np.sum(np.abs(state)) > 0:
+                    mean = np.mean(state)
+                    std = np.std(state)
+                    if std > 0:
+                        state = (state - mean) / std
+                        
+            return state
+        except Exception as e:
+            logger.warning(f"Error normalizing state: {str(e)}")
+            return state
+        
+    def remember(self, state, action, reward, next_state, done):
+        """Store experience in memory with example sifting"""
+        self.memory.add_example(state, action, reward, next_state, done)
+        
+        # Also track rewards for monitoring
+        self.rewards.append(reward)
+        if len(self.rewards) > 100:
+            self.rewards = self.rewards[-100:]
+        self.avg_reward = np.mean(self.rewards)
+    
+    def act(self, state, explore=True):
+        """Choose action using epsilon-greedy policy with built-in confidence thresholding"""
+        if explore and random.random() < self.epsilon:
+            return random.randrange(self.n_actions), 0.0  # Return action and zero confidence
+        
+        # Normalize state before inference
+        normalized_state = self._normalize_state(state)
+        
+        # Use the EnhancedCNN's act method which includes confidence thresholding
+        action, confidence = self.policy_net.act(normalized_state, explore=explore)
+        
+        # Track confidence metrics
+        self.confidence_history.append(confidence)
+        if len(self.confidence_history) > 100:
+            self.confidence_history = self.confidence_history[-100:]
+        
+        # Update confidence metrics
+        self.avg_confidence = sum(self.confidence_history) / len(self.confidence_history)
+        self.max_confidence = max(self.max_confidence, confidence)
+        self.min_confidence = min(self.min_confidence, confidence)
+        
+        # Log average confidence occasionally
+        if random.random() < 0.01:  # 1% of the time
+            logger.info(f"Confidence metrics - Current: {confidence:.4f}, Avg: {self.avg_confidence:.4f}, " +
+                       f"Min: {self.min_confidence:.4f}, Max: {self.max_confidence:.4f}")
+        
+        return action, confidence
+    
+    def replay(self):
+        """Train the model using experience replay with high-quality examples"""
+        # Check if enough samples in memory
+        if len(self.memory) < self.batch_size:
+            return 0.0
+        
+        # Get batch of experiences
+        batch = self.memory.get_batch(self.batch_size)
+        if batch is None:
+            return 0.0
+            
+        states = torch.FloatTensor(batch['states']).to(self.device)
+        actions = torch.LongTensor(batch['actions']).to(self.device)
+        rewards = torch.FloatTensor(batch['rewards']).to(self.device)
+        next_states = torch.FloatTensor(batch['next_states']).to(self.device)
+        dones = torch.FloatTensor(batch['dones']).to(self.device)
+        
+        # Compute Q values
+        self.policy_net.train()  # Set to training mode
+        
+        # Get current Q values
+        if self.use_mixed_precision:
+            with torch.cuda.amp.autocast():
+                # Get current Q values
+                q_values, _, _, _ = self.policy_net(states)
+                current_q = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
+                
+                # Compute target Q values
+                with torch.no_grad():
+                    self.target_net.eval()
+                    next_q_values, _, _, _ = self.target_net(next_states)
+                    next_q = next_q_values.max(1)[0]
+                    target_q = rewards + (1 - dones) * self.gamma * next_q
+                
+                # Compute loss
+                loss = self.criterion(current_q, target_q)
+                
+            # Perform backpropagation with mixed precision
+            self.optimizer.zero_grad()
+            self.scaler.scale(loss).backward()
+            self.scaler.unscale_(self.optimizer)
+            torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+            self.scaler.step(self.optimizer)
+            self.scaler.update()
+        else:
+            # Standard precision training
+            # Get current Q values
+            q_values, _, _, _ = self.policy_net(states)
+            current_q = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
+            
+            # Compute target Q values
+            with torch.no_grad():
+                self.target_net.eval()
+                next_q_values, _, _, _ = self.target_net(next_states)
+                next_q = next_q_values.max(1)[0]
+                target_q = rewards + (1 - dones) * self.gamma * next_q
+            
+            # Compute loss
+            loss = self.criterion(current_q, target_q)
+            
+            # Perform backpropagation
+            self.optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+            self.optimizer.step()
+        
+        # Track loss
+        loss_value = loss.item()
+        self.losses.append(loss_value)
+        if len(self.losses) > 100:
+            self.losses = self.losses[-100:]
+        
+        # Update target network
+        self.update_count += 1
+        if self.update_count % self.target_update == 0:
+            self.target_net.load_state_dict(self.policy_net.state_dict())
+            logger.info(f"Updated target network (step {self.update_count})")
+        
+        # Decay epsilon
+        if self.epsilon > self.epsilon_min:
+            self.epsilon *= self.epsilon_decay
+        
+        return loss_value
+    
+    def save(self, path):
+        """Save agent state and models"""
+        self.policy_net.save(f"{path}_policy")
+        self.target_net.save(f"{path}_target")
+        
+        # Save agent state
+        torch.save({
+            'epsilon': self.epsilon,
+            'confidence_threshold': self.confidence_threshold,
+            'losses': self.losses,
+            'rewards': self.rewards,
+            'avg_reward': self.avg_reward,
+            'confidence_history': self.confidence_history,
+            'avg_confidence': self.avg_confidence,
+            'max_confidence': self.max_confidence,
+            'min_confidence': self.min_confidence,
+            'update_count': self.update_count
+        }, f"{path}_agent_state.pt")
+        
+        logger.info(f"Agent state saved to {path}_agent_state.pt")
+    
+    def load(self, path):
+        """Load agent state and models"""
+        policy_loaded = self.policy_net.load(f"{path}_policy")
+        target_loaded = self.target_net.load(f"{path}_target")
+        
+        # Load agent state if available
+        agent_state_path = f"{path}_agent_state.pt"
+        if os.path.exists(agent_state_path):
+            try:
+                state = torch.load(agent_state_path)
+                self.epsilon = state.get('epsilon', self.epsilon)
+                self.confidence_threshold = state.get('confidence_threshold', self.confidence_threshold)
+                self.policy_net.confidence_threshold = self.confidence_threshold
+                self.target_net.confidence_threshold = self.confidence_threshold
+                self.losses = state.get('losses', [])
+                self.rewards = state.get('rewards', [])
+                self.avg_reward = state.get('avg_reward', 0.0)
+                self.confidence_history = state.get('confidence_history', [])
+                self.avg_confidence = state.get('avg_confidence', 0.0)
+                self.max_confidence = state.get('max_confidence', 0.0)
+                self.min_confidence = state.get('min_confidence', 1.0)
+                self.update_count = state.get('update_count', 0)
+                logger.info(f"Agent state loaded from {agent_state_path}")
+            except Exception as e:
+                logger.error(f"Error loading agent state: {str(e)}")
+        
+        return policy_loaded and target_loaded 
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@@ -0,0 +1,413 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import os
+import logging
+import torch.nn.functional as F
+from typing import List, Tuple, Dict, Any, Optional, Union
+
+# Configure logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class ResidualBlock(nn.Module):
+    """
+    Residual block with pre-activation (BatchNorm -> ReLU -> Conv)
+    """
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(ResidualBlock, self).__init__()
+        self.bn1 = nn.BatchNorm1d(in_channels)
+        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
+        
+        # Shortcut connection to match dimensions
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.shortcut = nn.Sequential(
+                nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
+            )
+        
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out)
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out += shortcut
+        return out
+
+class SelfAttention(nn.Module):
+    """
+    Self-attention mechanism for sequential data
+    """
+    def __init__(self, dim):
+        super(SelfAttention, self).__init__()
+        self.query = nn.Linear(dim, dim)
+        self.key = nn.Linear(dim, dim)
+        self.value = nn.Linear(dim, dim)
+        self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32))
+        
+    def forward(self, x):
+        # x shape: [batch_size, seq_len, dim]
+        batch_size, seq_len, dim = x.size()
+        
+        q = self.query(x)  # [batch_size, seq_len, dim]
+        k = self.key(x)    # [batch_size, seq_len, dim]
+        v = self.value(x)  # [batch_size, seq_len, dim]
+        
+        # Calculate attention scores
+        scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale  # [batch_size, seq_len, seq_len]
+        
+        # Apply softmax to get attention weights
+        attention = F.softmax(scores, dim=-1)  # [batch_size, seq_len, seq_len]
+        
+        # Apply attention to values
+        out = torch.matmul(attention, v)  # [batch_size, seq_len, dim]
+        
+        return out, attention
+
+class EnhancedCNN(nn.Module):
+    """
+    Enhanced CNN model with residual connections and attention mechanisms
+    for improved trading decision making
+    """
+    def __init__(self, input_shape, n_actions, confidence_threshold=0.5):
+        super(EnhancedCNN, self).__init__()
+        
+        # Store dimensions
+        self.input_shape = input_shape
+        self.n_actions = n_actions
+        self.confidence_threshold = confidence_threshold
+        
+        # Calculate input dimensions
+        if isinstance(input_shape, (list, tuple)):
+            if len(input_shape) == 3:  # [channels, height, width]
+                self.channels, self.height, self.width = input_shape
+                self.feature_dim = self.height * self.width
+            elif len(input_shape) == 2:  # [timeframes, features]
+                self.channels = input_shape[0]
+                self.features = input_shape[1]
+                self.feature_dim = self.features * self.channels
+            elif len(input_shape) == 1:  # [features]
+                self.channels = 1
+                self.features = input_shape[0]
+                self.feature_dim = self.features
+            else:
+                raise ValueError(f"Unsupported input shape: {input_shape}")
+        else:  # single integer
+            self.channels = 1
+            self.features = input_shape
+            self.feature_dim = input_shape
+        
+        # Build network
+        self._build_network()
+        
+        # Initialize device
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.to(self.device)
+        
+        logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}")
+    
+    def _build_network(self):
+        """Build the enhanced neural network with current feature dimensions"""
+        
+        # 1D CNN for sequential data
+        if self.channels > 1:
+            # Reshape expected: [batch, timeframes, features]
+            self.conv_layers = nn.Sequential(
+                nn.Conv1d(self.channels, 64, kernel_size=3, padding=1),
+                nn.BatchNorm1d(64),
+                nn.ReLU(),
+                nn.Dropout(0.2),
+                
+                ResidualBlock(64, 128),
+                nn.MaxPool1d(kernel_size=2, stride=2),
+                nn.Dropout(0.3),
+                
+                ResidualBlock(128, 256),
+                nn.MaxPool1d(kernel_size=2, stride=2),
+                nn.Dropout(0.4),
+                
+                ResidualBlock(256, 512),
+                nn.AdaptiveAvgPool1d(1)  # Global average pooling
+            )
+            # Feature dimension after conv layers
+            self.conv_features = 512
+        else:
+            # For 1D vectors, skip the convolutional part
+            self.conv_layers = None
+            self.conv_features = 0
+        
+        # Fully connected layers for all cases
+        # We'll use deeper layers with skip connections
+        if self.conv_layers is None:
+            # For 1D inputs without conv preprocessing
+            self.fc1 = nn.Linear(self.feature_dim, 512)
+            self.features_dim = 512
+        else:
+            # For data processed by conv layers
+            self.fc1 = nn.Linear(self.conv_features, 512)
+            self.features_dim = 512
+        
+        # Common feature extraction layers
+        self.fc_layers = nn.Sequential(
+            self.fc1,
+            nn.ReLU(),
+            nn.Dropout(0.4),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Dropout(0.4),
+            nn.Linear(512, 256),
+            nn.ReLU()
+        )
+        
+        # Dueling architecture
+        self.advantage_stream = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Linear(128, self.n_actions)
+        )
+        
+        self.value_stream = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Linear(128, 1)
+        )
+        
+        # Extrema detection head with increased capacity
+        self.extrema_head = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, 3)  # 0=bottom, 1=top, 2=neither
+        )
+        
+        # Price prediction heads with increased capacity
+        self.price_pred_immediate = nn.Sequential(
+            nn.Linear(256, 64),
+            nn.ReLU(),
+            nn.Linear(64, 3)  # Up, Down, Sideways
+        )
+        
+        self.price_pred_midterm = nn.Sequential(
+            nn.Linear(256, 64),
+            nn.ReLU(),
+            nn.Linear(64, 3)  # Up, Down, Sideways
+        )
+        
+        self.price_pred_longterm = nn.Sequential(
+            nn.Linear(256, 64),
+            nn.ReLU(),
+            nn.Linear(64, 3)  # Up, Down, Sideways
+        )
+        
+        # Value prediction with increased capacity
+        self.price_pred_value = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, 4)  # % change for different timeframes
+        )
+        
+        # Additional attention layer for feature refinement
+        self.attention = SelfAttention(256)
+    
+    def _check_rebuild_network(self, features):
+        """Check if network needs to be rebuilt for different feature dimensions"""
+        if features != self.feature_dim:
+            logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
+            self.feature_dim = features
+            self._build_network()
+            # Move to device after rebuilding
+            self.to(self.device)
+            return True
+        return False
+        
+    def forward(self, x):
+        """Forward pass through the network"""
+        batch_size = x.size(0)
+        
+        # Process different input shapes
+        if len(x.shape) > 2:
+            # Handle 3D input [batch, timeframes, features]
+            if self.conv_layers is not None:
+                # Reshape for 1D convolution:
+                # [batch, timeframes, features] -> [batch, timeframes, features*1]
+                if len(x.shape) == 3:
+                    x = x.permute(0, 1, 2)  # Ensure shape is [batch, timeframes, features]
+                x_reshaped = x.permute(0, 1, 2)  # [batch, timeframes, features]
+                
+                # Check if the feature dimension has changed and rebuild if necessary
+                if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
+                    total_features = x_reshaped.size(1) * x_reshaped.size(2)
+                    self._check_rebuild_network(total_features)
+                
+                # Apply convolutions
+                x_conv = self.conv_layers(x_reshaped)
+                # Flatten: [batch, channels, 1] -> [batch, channels]
+                x_flat = x_conv.view(batch_size, -1)
+            else:
+                # If no conv layers, just flatten
+                x_flat = x.view(batch_size, -1)
+        else:
+            # For 2D input [batch, features]
+            x_flat = x
+            
+            # Check if dimensions have changed
+            if x_flat.size(1) != self.feature_dim:
+                self._check_rebuild_network(x_flat.size(1))
+        
+        # Apply FC layers
+        features = self.fc_layers(x_flat)
+        
+        # Add attention for feature refinement
+        features_3d = features.unsqueeze(1)  # [batch, 1, features]
+        features_attended, _ = self.attention(features_3d)
+        features_refined = features_attended.squeeze(1)  # [batch, features]
+        
+        # Calculate advantage and value
+        advantage = self.advantage_stream(features_refined)
+        value = self.value_stream(features_refined)
+        
+        # Combine for Q-values (Dueling architecture)
+        q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
+        
+        # Get extrema predictions
+        extrema_pred = self.extrema_head(features_refined)
+        
+        # Price movement predictions
+        price_immediate = self.price_pred_immediate(features_refined)
+        price_midterm = self.price_pred_midterm(features_refined)
+        price_longterm = self.price_pred_longterm(features_refined)
+        price_values = self.price_pred_value(features_refined)
+        
+        # Package price predictions
+        price_predictions = {
+            'immediate': price_immediate,
+            'midterm': price_midterm,
+            'longterm': price_longterm,
+            'values': price_values
+        }
+        
+        return q_values, extrema_pred, price_predictions, features_refined
+    
+    def act(self, state, explore=True):
+        """
+        Choose action based on state with confidence thresholding
+        """
+        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        
+        with torch.no_grad():
+            q_values, _, _, _ = self(state_tensor)
+            
+            # Apply softmax to get action probabilities
+            action_probs = F.softmax(q_values, dim=1)
+            
+            # Get action with highest probability
+            action = action_probs.argmax(dim=1).item()
+            action_confidence = action_probs[0, action].item()
+            
+            # Check if confidence exceeds threshold
+            if action_confidence < self.confidence_threshold:
+                # Force HOLD action (typically action 2)
+                action = 2  # Assume 2 is HOLD
+                logger.info(f"Action {action} confidence {action_confidence:.4f} below threshold {self.confidence_threshold}, forcing HOLD")
+            
+        return action, action_confidence
+        
+    def save(self, path):
+        """Save model weights and architecture"""
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        torch.save({
+            'state_dict': self.state_dict(),
+            'input_shape': self.input_shape,
+            'n_actions': self.n_actions,
+            'feature_dim': self.feature_dim,
+            'confidence_threshold': self.confidence_threshold
+        }, f"{path}.pt")
+        logger.info(f"Enhanced CNN model saved to {path}.pt")
+        
+    def load(self, path):
+        """Load model weights and architecture"""
+        try:
+            checkpoint = torch.load(f"{path}.pt", map_location=self.device)
+            self.input_shape = checkpoint['input_shape']
+            self.n_actions = checkpoint['n_actions']
+            self.feature_dim = checkpoint['feature_dim']
+            if 'confidence_threshold' in checkpoint:
+                self.confidence_threshold = checkpoint['confidence_threshold']
+            self._build_network()
+            self.load_state_dict(checkpoint['state_dict'])
+            self.to(self.device)
+            logger.info(f"Enhanced CNN model loaded from {path}.pt")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading model: {str(e)}")
+            return False
+
+# Additional utility for example sifting
+class ExampleSiftingDataset:
+    """
+    Dataset that selectively keeps high-quality examples for training
+    to improve model performance
+    """
+    def __init__(self, max_examples=50000):
+        self.examples = []
+        self.labels = []
+        self.rewards = []
+        self.max_examples = max_examples
+        self.min_reward_threshold = -0.05  # Minimum reward to keep an example
+        
+    def add_example(self, state, action, reward, next_state, done):
+        """Add a new training example with reward-based filtering"""
+        # Only keep examples with rewards above the threshold
+        if reward > self.min_reward_threshold:
+            self.examples.append((state, action, reward, next_state, done))
+            self.rewards.append(reward)
+            
+            # Sort by reward and keep only the top examples
+            if len(self.examples) > self.max_examples:
+                # Sort by reward (highest first)
+                sorted_indices = np.argsort(self.rewards)[::-1]
+                # Keep top examples
+                self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]]
+                self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]]
+                
+                # Update the minimum reward threshold to be the minimum in our kept examples
+                self.min_reward_threshold = min(self.rewards)
+                
+    def get_batch(self, batch_size):
+        """Get a batch of examples, prioritizing better examples"""
+        if not self.examples:
+            return None
+            
+        # Calculate selection probabilities based on rewards
+        rewards = np.array(self.rewards)
+        # Shift rewards to be positive for probability calculation
+        min_reward = min(rewards)
+        shifted_rewards = rewards - min_reward + 0.1  # Add small constant
+        probs = shifted_rewards / shifted_rewards.sum()
+        
+        # Sample batch indices with reward-based probabilities
+        indices = np.random.choice(
+            len(self.examples), 
+            size=min(batch_size, len(self.examples)),
+            p=probs,
+            replace=False
+        )
+        
+        # Create batch
+        batch = [self.examples[i] for i in indices]
+        states, actions, rewards, next_states, dones = zip(*batch)
+        
+        return {
+            'states': np.array(states),
+            'actions': np.array(actions),
+            'rewards': np.array(rewards),
+            'next_states': np.array(next_states),
+            'dones': np.array(dones)
+        }
+        
+    def __len__(self):
+        return len(self.examples) 
--- a/NN/models/saved/dqn_agent_best_metadata.json
+++ b/NN/models/saved/dqn_agent_best_metadata.json
@@ -0,0 +1 @@
+{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}
--- a/NN/models/saved/hybrid_stats_latest.json
+++ b/NN/models/saved/hybrid_stats_latest.json
@@ -0,0 +1,20 @@
+{
+  "supervised": {
+    "epochs_completed": 22650,
+    "best_val_pnl": 0.0,
+    "best_epoch": 50,
+    "best_win_rate": 0
+  },
+  "reinforcement": {
+    "episodes_completed": 0,
+    "best_reward": -Infinity,
+    "best_episode": 0,
+    "best_win_rate": 0
+  },
+  "hybrid": {
+    "iterations_completed": 453,
+    "best_combined_score": 0.0,
+    "training_started": "2025-04-09T10:30:42.510856",
+    "last_update": "2025-04-09T10:40:02.217840"
+  }
+}
--- a/NN/models/saved/realtime_ticks_training_stats.json
+++ b/NN/models/saved/realtime_ticks_training_stats.json
@@ -0,0 +1,326 @@
+{
+  "epochs_completed": 8,
+  "best_val_pnl": 0.0,
+  "best_epoch": 1,
+  "best_win_rate": 0.0,
+  "training_started": "2025-04-02T10:43:58.946682",
+  "last_update": "2025-04-02T10:44:10.940892",
+  "epochs": [
+    {
+      "epoch": 1,
+      "train_loss": 1.0950355529785156,
+      "val_loss": 1.1657923062642415,
+      "train_acc": 0.3255208333333333,
+      "val_acc": 0.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:01.840889",
+      "data_age": 2,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 2,
+      "train_loss": 1.0831659038861592,
+      "val_loss": 1.1212460199991863,
+      "train_acc": 0.390625,
+      "val_acc": 0.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:03.134833",
+      "data_age": 4,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 3,
+      "train_loss": 1.0740693012873332,
+      "val_loss": 1.0992945830027263,
+      "train_acc": 0.4739583333333333,
+      "val_acc": 0.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:04.425272",
+      "data_age": 5,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 4,
+      "train_loss": 1.0747728943824768,
+      "val_loss": 1.0821794271469116,
+      "train_acc": 0.4609375,
+      "val_acc": 0.3229166666666667,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:05.716421",
+      "data_age": 6,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 5,
+      "train_loss": 1.0489931503931682,
+      "val_loss": 1.0669521888097127,
+      "train_acc": 0.5833333333333334,
+      "val_acc": 1.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:07.007935",
+      "data_age": 8,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 6,
+      "train_loss": 1.0533669590950012,
+      "val_loss": 1.0505590836207073,
+      "train_acc": 0.5104166666666666,
+      "val_acc": 1.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:08.296061",
+      "data_age": 9,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 7,
+      "train_loss": 1.0456886688868205,
+      "val_loss": 1.0351698795954387,
+      "train_acc": 0.5651041666666666,
+      "val_acc": 1.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:09.607584",
+      "data_age": 10,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    },
+    {
+      "epoch": 8,
+      "train_loss": 1.040040671825409,
+      "val_loss": 1.0227736632029216,
+      "train_acc": 0.6119791666666666,
+      "val_acc": 1.0,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 1.0,
+          "SELL": 0.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-04-02T10:44:10.940892",
+      "data_age": 11,
+      "cumulative_pnl": {
+        "train": 0.0,
+        "val": 0.0
+      },
+      "total_trades": {
+        "train": 0,
+        "val": 0
+      },
+      "overall_win_rate": {
+        "train": 0.0,
+        "val": 0.0
+      }
+    }
+  ],
+  "cumulative_pnl": {
+    "train": 0.0,
+    "val": 0.0
+  },
+  "total_trades": {
+    "train": 0,
+    "val": 0
+  },
+  "total_wins": {
+    "train": 0,
+    "val": 0
+  }
+}
--- a/NN/models/saved/realtime_training_stats.json
+++ b/NN/models/saved/realtime_training_stats.json
@@ -0,0 +1,192 @@
+{
+  "epochs_completed": 7,
+  "best_val_pnl": 0.002028853100759435,
+  "best_epoch": 6,
+  "best_win_rate": 0.5157894736842106,
+  "training_started": "2025-03-31T02:50:10.418670",
+  "last_update": "2025-03-31T02:50:15.227593",
+  "epochs": [
+    {
+      "epoch": 1,
+      "train_loss": 1.1206786036491394,
+      "val_loss": 1.0542699098587036,
+      "train_acc": 0.11197916666666667,
+      "val_acc": 0.25,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:12.881423",
+      "data_age": 2
+    },
+    {
+      "epoch": 2,
+      "train_loss": 1.1266120672225952,
+      "val_loss": 1.072133183479309,
+      "train_acc": 0.1171875,
+      "val_acc": 0.25,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:13.186840",
+      "data_age": 2
+    },
+    {
+      "epoch": 3,
+      "train_loss": 1.1415620843569438,
+      "val_loss": 1.1701548099517822,
+      "train_acc": 0.1015625,
+      "val_acc": 0.5208333333333334,
+      "train_pnl": 0.0,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.0,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:13.442018",
+      "data_age": 3
+    },
+    {
+      "epoch": 4,
+      "train_loss": 1.1331567962964375,
+      "val_loss": 1.070081114768982,
+      "train_acc": 0.09375,
+      "val_acc": 0.22916666666666666,
+      "train_pnl": 0.010650217327384765,
+      "val_pnl": -0.0007049481907895126,
+      "train_win_rate": 0.49279538904899134,
+      "val_win_rate": 0.40625,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.9036458333333334,
+          "HOLD": 0.09635416666666667
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 0.3333333333333333,
+          "HOLD": 0.6666666666666666
+        }
+      },
+      "timestamp": "2025-03-31T02:50:13.739899",
+      "data_age": 3
+    },
+    {
+      "epoch": 5,
+      "train_loss": 1.10965762535731,
+      "val_loss": 1.0485950708389282,
+      "train_acc": 0.12239583333333333,
+      "val_acc": 0.17708333333333334,
+      "train_pnl": 0.011924086862580204,
+      "val_pnl": 0.0,
+      "train_win_rate": 0.5070422535211268,
+      "val_win_rate": 0.0,
+      "best_position_size": 0.1,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.7395833333333334,
+          "HOLD": 0.2604166666666667
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 0.0,
+          "HOLD": 1.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:14.073439",
+      "data_age": 3
+    },
+    {
+      "epoch": 6,
+      "train_loss": 1.1272419293721516,
+      "val_loss": 1.084235429763794,
+      "train_acc": 0.1015625,
+      "val_acc": 0.22916666666666666,
+      "train_pnl": 0.014825159601390072,
+      "val_pnl": 0.00405770620151887,
+      "train_win_rate": 0.4908616187989556,
+      "val_win_rate": 0.5157894736842106,
+      "best_position_size": 2.0,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 1.0,
+          "HOLD": 0.0
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 1.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:14.658295",
+      "data_age": 4
+    },
+    {
+      "epoch": 7,
+      "train_loss": 1.1171108484268188,
+      "val_loss": 1.0741244554519653,
+      "train_acc": 0.1171875,
+      "val_acc": 0.22916666666666666,
+      "train_pnl": 0.0059474696523706605,
+      "val_pnl": 0.00405770620151887,
+      "train_win_rate": 0.4838709677419355,
+      "val_win_rate": 0.5157894736842106,
+      "best_position_size": 2.0,
+      "signal_distribution": {
+        "train": {
+          "BUY": 0.0,
+          "SELL": 0.7291666666666666,
+          "HOLD": 0.2708333333333333
+        },
+        "val": {
+          "BUY": 0.0,
+          "SELL": 1.0,
+          "HOLD": 0.0
+        }
+      },
+      "timestamp": "2025-03-31T02:50:15.227593",
+      "data_age": 4
+    }
+  ]
+}
--- a/NN/models/simple_cnn.py
+++ b/NN/models/simple_cnn.py
@@ -112,27 +112,33 @@ class SimpleCNN(nn.Module):
    def _build_network(self):
        """Build the neural network with current feature dimensions"""
        # Create a flexible architecture that adapts to input dimensions
+        # Increased complexity
        self.fc_layers = nn.Sequential(
-            nn.Linear(self.feature_dim, 256),
+            nn.Linear(self.feature_dim, 512), # Increased size
            nn.ReLU(),
-            nn.Linear(256, 256),
-            nn.ReLU()
+            nn.Dropout(0.2), # Added dropout
+            nn.Linear(512, 512),             # Increased size
+            nn.ReLU(),
+            nn.Dropout(0.2),             # Added dropout
+            nn.Linear(512, 512),             # Added layer
+            nn.ReLU(),
+            nn.Dropout(0.2)              # Added dropout
        )
        
        # Output heads (Dueling DQN architecture)
-        self.advantage_head = nn.Linear(256, self.n_actions)
-        self.value_head = nn.Linear(256, 1)
+        self.advantage_head = nn.Linear(512, self.n_actions) # Updated input size
+        self.value_head = nn.Linear(512, 1)                 # Updated input size
        
        # Extrema detection head
-        self.extrema_head = nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
+        self.extrema_head = nn.Linear(512, 3)  # 0=bottom, 1=top, 2=neither, Updated input size
        
        # Price prediction heads for different timeframes
-        self.price_pred_immediate = nn.Linear(256, 3)  # Up, Down, Sideways for immediate term (1s, 1m)
-        self.price_pred_midterm = nn.Linear(256, 3)    # Up, Down, Sideways for mid-term (1h)
-        self.price_pred_longterm = nn.Linear(256, 3)   # Up, Down, Sideways for long-term (1d)
+        self.price_pred_immediate = nn.Linear(512, 3)  # Updated input size
+        self.price_pred_midterm = nn.Linear(512, 3)    # Updated input size
+        self.price_pred_longterm = nn.Linear(512, 3)   # Updated input size
        
        # Regression heads for exact price prediction
-        self.price_pred_value = nn.Linear(256, 4)  # Predicts % change for each timeframe (1s, 1m, 1h, 1d)
+        self.price_pred_value = nn.Linear(512, 4)  # Updated input size
    
    def _check_rebuild_network(self, features):
        """Check if network needs to be rebuilt for different feature dimensions"""
@@ -146,58 +152,70 @@ class SimpleCNN(nn.Module):
        return False
        
    def forward(self, x):
-        """
-        Forward pass through the network
-        Returns action values, extrema predictions, and price movement predictions for multiple timeframes
-        """
-        # Handle different input shapes
-        if len(x.shape) == 2:  # [batch_size, features]
-            # Simple feature vector
-            batch_size, features = x.shape
-            # Check if we need to rebuild the network for new dimensions
-            self._check_rebuild_network(features)
-            
-        elif len(x.shape) == 3:  # [batch_size, timeframes/channels, features]
-            # Reshape to flatten timeframes/channels with features
-            batch_size, timeframes, features = x.shape
-            total_features = timeframes * features
-            
-            # Check if we need to rebuild the network for new dimensions
-            self._check_rebuild_network(total_features)
-            
-            # Reshape tensor to [batch_size, total_features]
-            x = x.reshape(batch_size, total_features)
-            
-        # Apply fully connected layers
-        fc_out = self.fc_layers(x)
+        """Forward pass through the network"""
+        # Flatten input if needed to ensure it matches the expected feature dimension
+        batch_size = x.size(0)
        
-        # Dueling architecture
-        advantage = self.advantage_head(fc_out)
-        value = self.value_head(fc_out)
+        # Reshape input if needed
+        if len(x.shape) > 2:  # Handle multi-dimensional input
+            # For 3D input: [batch, seq_len, features] or [batch, channels, features]
+            x = x.reshape(batch_size, -1)  # Flatten to [batch, seq_len*features]
        
-        # Q-values = value + (advantage - mean(advantage))
-        action_values = value + advantage - advantage.mean(dim=1, keepdim=True)
+        # Check if the feature dimension matches and rebuild if necessary
+        if x.size(1) != self.feature_dim:
+            self._check_rebuild_network(x.size(1))
        
-        # Extrema predictions
-        extrema_pred = self.extrema_head(fc_out)
+        # Apply fully connected layers with ReLU activation
+        x = self.fc_layers(x)
        
-        # Price movement predictions for different timeframes
-        price_immediate = self.price_pred_immediate(fc_out)  # 1s, 1m
-        price_midterm = self.price_pred_midterm(fc_out)      # 1h
-        price_longterm = self.price_pred_longterm(fc_out)    # 1d
+        # Branch 1: Action values (Q-values)
+        action_values = self.advantage_head(x)
        
-        # Regression values for exact price predictions (percentage changes)
-        price_values = self.price_pred_value(fc_out)
+        # Branch 2: Extrema detection (market top/bottom classification)
+        extrema_pred = self.extrema_head(x)
        
-        # Return all predictions in a structured dictionary
+        # Branch 3: Price movement prediction over different timeframes
+        # Split into three timeframes: immediate, midterm, longterm
+        price_immediate = self.price_pred_immediate(x)
+        price_midterm = self.price_pred_midterm(x)
+        price_longterm = self.price_pred_longterm(x)
+        
+        # Branch 4: Value prediction (regression for expected price changes)
+        price_values = self.price_pred_value(x)
+        
+        # Package price predictions
        price_predictions = {
-            'immediate': price_immediate,
-            'midterm': price_midterm,
-            'longterm': price_longterm,
-            'values': price_values
+            'immediate': price_immediate,  # Classification (up/down/sideways)
+            'midterm': price_midterm,      # Classification (up/down/sideways)
+            'longterm': price_longterm,    # Classification (up/down/sideways)
+            'values': price_values         # Regression (expected % change)
        }
        
-        return action_values, extrema_pred, price_predictions
+        # Return all outputs and the hidden feature representation
+        return action_values, extrema_pred, price_predictions, x
+    
+    def extract_features(self, x):
+        """Extract hidden features from the input and return both action values and features"""
+        # Flatten input if needed to ensure it matches the expected feature dimension
+        batch_size = x.size(0)
+        
+        # Reshape input if needed
+        if len(x.shape) > 2:  # Handle multi-dimensional input
+            # For 3D input: [batch, seq_len, features] or [batch, channels, features]
+            x = x.reshape(batch_size, -1)  # Flatten to [batch, seq_len*features]
+        
+        # Check if the feature dimension matches and rebuild if necessary
+        if x.size(1) != self.feature_dim:
+            self._check_rebuild_network(x.size(1))
+            
+        # Apply fully connected layers with ReLU activation
+        x_features = self.fc_layers(x)
+        
+        # Branch 1: Action values (Q-values)
+        action_values = self.advantage_head(x_features)
+        
+        # Return action values and the hidden feature representation
+        return action_values, x_features
        
    def save(self, path):
        """Save model weights and architecture"""
@@ -241,8 +259,10 @@ class CNNModelPyTorch(nn.Module):
        self.output_size = output_size
        self.timeframes = timeframes
        
-        # Calculate total input features across all timeframes
-        self.total_features = num_features * len(timeframes)
+        # num_features should already be the total features across all timeframes
+        self.total_features = num_features
+        logger.info(f"CNNModelPyTorch initialized with window_size={window_size}, num_features={num_features}, "
+                   f"total_features={self.total_features}, output_size={output_size}, timeframes={timeframes}")
        
        # Device configuration
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -317,6 +337,10 @@ class CNNModelPyTorch(nn.Module):
        # Ensure input is on the correct device
        x = x.to(self.device)
        
+        # Log input tensor shape for debugging
+        input_shape = x.size()
+        logger.debug(f"Input tensor shape: {input_shape}")
+        
        # Check input dimensions and reshape as needed
        if len(x.size()) == 2:
            # If input is [batch_size, features], reshape to [batch_size, features, 1]
@@ -324,8 +348,17 @@ class CNNModelPyTorch(nn.Module):
            
            # Check and handle if input features don't match model expectations
            if feature_dim != self.total_features:
-                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
-                self.rebuild_conv_layers(feature_dim)
+                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
+                if not hasattr(self, 'rebuild_warning_shown'):
+                    logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
+                    self.rebuild_warning_shown = True
+                # Don't rebuild - instead adapt the input
+                # If features are fewer, pad with zeros. If more, truncate
+                if feature_dim < self.total_features:
+                    padding = torch.zeros(batch_size, self.total_features - feature_dim, device=self.device)
+                    x = torch.cat([x, padding], dim=1)
+                else:
+                    x = x[:, :self.total_features]
            
            # For 1D input, use a sequence length of 1
            seq_len = 1
@@ -336,14 +369,26 @@ class CNNModelPyTorch(nn.Module):
            
            # Check and handle if input dimensions don't match model expectations
            if feature_dim != self.total_features:
-                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
-                self.rebuild_conv_layers(feature_dim)
+                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
+                if not hasattr(self, 'rebuild_warning_shown'):
+                    logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
+                    self.rebuild_warning_shown = True
+                # Don't rebuild - instead adapt the input
+                # If features are fewer, pad with zeros. If more, truncate
+                if feature_dim < self.total_features:
+                    padding = torch.zeros(batch_size, seq_len, self.total_features - feature_dim, device=self.device)
+                    x = torch.cat([x, padding], dim=2)
+                else:
+                    x = x[:, :, :self.total_features]
            
            # Reshape input: [batch, window_size, features] -> [batch, features, window_size]
            x = x.permute(0, 2, 1)
        else:
            raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
        
+        # Log reshaped tensor for debugging
+        logger.debug(f"Reshaped tensor for convolution: {x.size()}")
+        
        # Convolutional layers with dropout - safely handle small spatial dimensions
        try:
            x = self.dropout1(F.relu(self.norm1(self.conv1(x))))
				`@@ -0,0 +1 @@`
				`{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}`