diff --git a/NN/models/dqn_agent.py b/NN/models/dqn_agent.py
index ac5a3d2..8b019e2 100644
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -657,6 +657,24 @@ class DQNAgent:
             done: Whether episode is done
             is_extrema: Whether this is a local extrema sample (for specialized learning)
         """
+        # Validate states before storing experience
+        if state is None or next_state is None:
+            logger.debug("Skipping experience storage: None state provided")
+            return
+            
+        if isinstance(state, dict) and not state:
+            logger.debug("Skipping experience storage: empty state dictionary")
+            return
+            
+        if isinstance(next_state, dict) and not next_state:
+            logger.debug("Skipping experience storage: empty next_state dictionary")
+            return
+        
+        # Check if states are all zeros (invalid)
+        if hasattr(state, '__iter__') and all(f == 0 for f in np.array(state).flatten()):
+            logger.debug("Skipping experience storage: state is all zeros")
+            return
+            
         experience = (state, action, reward, next_state, done)
         
         # Always add to main memory
@@ -761,6 +779,15 @@ class DQNAgent:
             int: Action (0=BUY, 1=SELL) 
         """
         try:
+            # Validate state first - return early if empty/invalid/None
+            if state is None:
+                logger.warning("None state provided to act(), returning SELL action")
+                return 1  # SELL action (safe default)
+                
+            if isinstance(state, dict) and not state:
+                logger.warning("Empty state dictionary provided to act(), returning SELL action")
+                return 1  # SELL action (safe default)
+                
             # Use the DQNNetwork's act method for consistent behavior
             action_idx, confidence, action_probs = self.policy_net.act(state, explore=explore)
             
@@ -790,6 +817,14 @@ class DQNAgent:
     def act_with_confidence(self, state: np.ndarray, market_regime: str = 'trending') -> Tuple[int, float, List[float]]:
         """Choose action with confidence score adapted to market regime"""
         try:
+            # Validate state first - return early if empty/invalid/None
+            if state is None:
+                logger.warning("None state provided to act_with_confidence(), returning safe defaults")
+                return 1, 0.1, [0.0, 0.9, 0.1]  # SELL action with low confidence
+                
+            if isinstance(state, dict) and not state:
+                logger.warning("Empty state dictionary provided to act_with_confidence(), returning safe defaults")
+                return 1, 0.0, [0.0, 1.0]  # SELL action with zero confidence
             # Convert state to tensor if needed
             if isinstance(state, np.ndarray):
                 state_tensor = torch.FloatTensor(state)
diff --git a/core/orchestrator.py b/core/orchestrator.py
index 18467eb..04a4f6f 100644
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -368,6 +368,10 @@ class TradingOrchestrator:
             {}
         )  # {symbol: {action, timestamp, confidence}}
 
+        # Decision fusion overconfidence tracking
+        self.decision_fusion_overconfidence_count = 0
+        self.max_overconfidence_threshold = 3  # Disable after 3 overconfidence detections
+
         # Signal accumulation for trend confirmation
         self.signal_accumulator: Dict[str, List[Dict]] = (
             {}
@@ -1385,6 +1389,30 @@ class TradingOrchestrator:
         """Check if model training is enabled"""
         return self.model_toggle_states.get(model_name, {}).get("training_enabled", True)
 
+    def disable_decision_fusion_temporarily(self, reason: str = "overconfidence detected"):
+        """Temporarily disable decision fusion model due to issues"""
+        logger.warning(f"Disabling decision fusion model: {reason}")
+        self.set_model_toggle_state("decision_fusion", inference_enabled=False, training_enabled=False)
+        logger.info("Decision fusion model disabled. Will use programmatic decision combination.")
+
+    def enable_decision_fusion(self):
+        """Re-enable decision fusion model"""
+        logger.info("Re-enabling decision fusion model")
+        self.set_model_toggle_state("decision_fusion", inference_enabled=True, training_enabled=True)
+        self.decision_fusion_overconfidence_count = 0  # Reset overconfidence counter
+
+    def get_decision_fusion_status(self) -> Dict[str, Any]:
+        """Get current decision fusion model status"""
+        return {
+            "enabled": self.decision_fusion_enabled,
+            "mode": self.decision_fusion_mode,
+            "inference_enabled": self.is_model_inference_enabled("decision_fusion"),
+            "training_enabled": self.is_model_training_enabled("decision_fusion"),
+            "network_available": self.decision_fusion_network is not None,
+            "overconfidence_count": self.decision_fusion_overconfidence_count,
+            "max_overconfidence_threshold": self.max_overconfidence_threshold
+        }
+
     async def start_continuous_trading(self, symbols: Optional[List[str]] = None):
         """Start the continuous trading loop, using a decision model and trading executor"""
         if symbols is None:
@@ -1958,6 +1986,14 @@ class TradingOrchestrator:
                     self._store_decision_fusion_inference(
                         decision, predictions, current_price
                     )
+                    
+                    # Train fusion model in programmatic mode at regular intervals
+                    self.decision_fusion_decisions_count += 1
+                    if (self.decision_fusion_decisions_count % self.decision_fusion_training_interval == 0 and
+                        len(self.decision_fusion_training_data) >= self.decision_fusion_min_samples):
+                        
+                        logger.info(f"Training decision fusion model in programmatic mode (decision #{self.decision_fusion_decisions_count})")
+                        asyncio.create_task(self._train_decision_fusion_programmatic())
 
             # Update state
             self.last_decision_time[symbol] = current_time
@@ -3976,7 +4012,7 @@ class TradingOrchestrator:
         """Train COB RL model"""
         try:
             # COB RL models might have specific training methods
-            if hasattr(model, "add_experience"):
+            if hasattr(model, "remember"):
                 action_names = ["SELL", "HOLD", "BUY"]
                 action_idx = action_names.index(prediction["action"])
 
@@ -3988,7 +4024,7 @@ class TradingOrchestrator:
                     )
                     return False
 
-                model.add_experience(
+                model.remember(
                     state=state,
                     action=action_idx,
                     reward=reward,
@@ -4569,11 +4605,30 @@ class TradingOrchestrator:
             if base_data is None:
                 base_data = self.data_provider.build_base_data_input(symbol)
                 if not base_data:
-                    logger.warning(f"Cannot build BaseDataInput for RL state: {symbol}")
+                    logger.debug(f"Cannot build BaseDataInput for RL state: {symbol}")
                     return None
 
+            # Validate base_data has the required method
+            if not hasattr(base_data, 'get_feature_vector'):
+                logger.debug(f"BaseDataInput for {symbol} missing get_feature_vector method")
+                return None
+
             # Get unified feature vector (7850 features including all timeframes and COB data)
             feature_vector = base_data.get_feature_vector()
+            
+            # Validate feature vector
+            if feature_vector is None or len(feature_vector) == 0:
+                logger.debug(f"Empty feature vector for RL state: {symbol}")
+                return None
+            
+            # Check if all features are zero (invalid state)
+            if all(f == 0 for f in feature_vector):
+                logger.debug(f"All features are zero for RL state: {symbol}")
+                return None
+            
+            # Convert to numpy array if needed
+            if not isinstance(feature_vector, np.ndarray):
+                feature_vector = np.array(feature_vector, dtype=np.float32)
 
             # Return the full unified feature vector for RL agent
             # The DQN agent is now initialized with the correct size to match this
@@ -5152,6 +5207,55 @@ class TradingOrchestrator:
             logger.warning(f"Decision fusion initialization failed: {e}")
             self.decision_fusion_enabled = False
 
+    async def _train_decision_fusion_programmatic(self):
+        """Train decision fusion model in programmatic mode"""
+        try:
+            if not self.decision_fusion_network or len(self.decision_fusion_training_data) < self.decision_fusion_min_samples:
+                return
+                
+            logger.info(f"Training decision fusion model with {len(self.decision_fusion_training_data)} samples")
+            
+            # Prepare training data
+            inputs = []
+            targets = []
+            
+            for sample in self.decision_fusion_training_data[-100:]:  # Use last 100 samples
+                if 'input_features' in sample and 'outcome' in sample:
+                    inputs.append(sample['input_features'])
+                    # Convert outcome to target (1.0 for correct, 0.0 for incorrect)
+                    target = 1.0 if sample['outcome']['correct'] else 0.0
+                    targets.append(target)
+            
+            if len(inputs) < 10:  # Need minimum samples
+                return
+                
+            # Convert to tensors
+            inputs_tensor = torch.tensor(inputs, dtype=torch.float32, device=self.device)
+            targets_tensor = torch.tensor(targets, dtype=torch.float32, device=self.device)
+            
+            # Training step
+            self.decision_fusion_network.train()
+            optimizer = torch.optim.Adam(self.decision_fusion_network.parameters(), lr=0.001)
+            
+            optimizer.zero_grad()
+            outputs = self.decision_fusion_network(inputs_tensor)
+            loss = torch.nn.MSELoss()(outputs.squeeze(), targets_tensor)
+            loss.backward()
+            optimizer.step()
+            
+            # Update statistics
+            current_loss = loss.item()
+            self.update_model_loss("decision_fusion", current_loss)
+            
+            logger.info(f"Decision fusion training completed: loss={current_loss:.4f}, samples={len(inputs)}")
+            
+            # Save checkpoint periodically
+            if self.decision_fusion_decisions_count % (self.decision_fusion_training_interval * 5) == 0:
+                self._save_decision_fusion_checkpoint()
+                
+        except Exception as e:
+            logger.error(f"Error training decision fusion in programmatic mode: {e}")
+
     def _create_decision_fusion_input(
         self,
         symbol: str,
@@ -5293,17 +5397,44 @@ class TradingOrchestrator:
                 symbol, predictions, current_price, timestamp
             )
 
+            # DEBUG: Log decision fusion input features
+            logger.info(f"=== DECISION FUSION INPUT FEATURES ===")
+            logger.info(f"  Input shape: {input_features.shape}")
+            logger.info(f"  Input features (first 20): {input_features[0, :20].cpu().numpy()}")
+            logger.info(f"  Input features (last 20): {input_features[0, -20:].cpu().numpy()}")
+            logger.info(f"  Input features mean: {input_features.mean().item():.4f}")
+            logger.info(f"  Input features std: {input_features.std().item():.4f}")
+
             # Get decision fusion network prediction
             with torch.no_grad():
                 output = self.decision_fusion_network(input_features)
                 probabilities = output.squeeze().cpu().numpy()
 
+            # DEBUG: Log decision fusion outputs
+            logger.info(f"=== DECISION FUSION OUTPUTS ===")
+            logger.info(f"  Raw output shape: {output.shape}")
+            logger.info(f"  Probabilities: BUY={probabilities[0]:.4f}, SELL={probabilities[1]:.4f}, HOLD={probabilities[2]:.4f}")
+            logger.info(f"  Probability sum: {probabilities.sum():.4f}")
+
             # Convert probabilities to action and confidence
             action_idx = np.argmax(probabilities)
             actions = ["BUY", "SELL", "HOLD"]
             best_action = actions[action_idx]
             best_confidence = float(probabilities[action_idx])
 
+            # DEBUG: Check for overconfidence
+            if best_confidence > 0.95:
+                self.decision_fusion_overconfidence_count += 1
+                logger.warning(f"DECISION FUSION OVERCONFIDENCE DETECTED: {best_confidence:.3f} for {best_action} (count: {self.decision_fusion_overconfidence_count})")
+                
+                if self.decision_fusion_overconfidence_count >= self.max_overconfidence_threshold:
+                    logger.error(f"Decision fusion overconfidence threshold reached ({self.max_overconfidence_threshold}). Disabling model.")
+                    self.disable_decision_fusion_temporarily("overconfidence threshold exceeded")
+                    # Fallback to programmatic method
+                    return self._combine_predictions(
+                        symbol, current_price, predictions, timestamp
+                    )
+
             # Get current position P&L
             current_position_pnl = self._get_current_position_pnl(symbol, current_price)
 
@@ -6402,31 +6533,44 @@ class TradingOrchestrator:
             models_trained = []
 
             # Train DQN agent if available and enabled
-            if self.rl_agent and hasattr(self.rl_agent, "add_experience") and self.is_model_training_enabled("dqn"):
+            if self.rl_agent and hasattr(self.rl_agent, "remember") and self.is_model_training_enabled("dqn"):
                 try:
-                    # Create state representation from base_data (same as CNN model)
-                    state = self._create_state_from_base_data(symbol, base_data)
+                    # Validate base_data before creating state
+                    if not base_data or not hasattr(base_data, 'get_feature_vector'):
+                        logger.debug(f"⚠️ Skipping DQN training for {symbol}: no valid base_data")
+                    else:
+                        # Check if base_data has actual features
+                        features = base_data.get_feature_vector()
+                        if not features or len(features) == 0 or all(f == 0 for f in features):
+                            logger.debug(f"⚠️ Skipping DQN training for {symbol}: no valid features in base_data")
+                        else:
+                            # Create state representation from base_data (same as CNN model)
+                            state = self._create_state_from_base_data(symbol, base_data)
+                            
+                            # Skip training if no valid state could be created
+                            if state is None:
+                                logger.debug(f"⚠️ Skipping DQN training for {symbol}: could not create valid state")
+                            else:
+                                # Map action to DQN action space - CONSISTENT ACTION MAPPING
+                                action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
+                                dqn_action = action_mapping.get(action, 2)
 
-                    # Map action to DQN action space - CONSISTENT ACTION MAPPING
-                    action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
-                    dqn_action = action_mapping.get(action, 2)
+                                # Calculate immediate reward based on confidence and execution
+                                immediate_reward = confidence if action != "HOLD" else 0.0
 
-                    # Calculate immediate reward based on confidence and execution
-                    immediate_reward = confidence if action != "HOLD" else 0.0
+                                # Add experience to DQN
+                                self.rl_agent.remember(
+                                    state=state,
+                                    action=dqn_action,
+                                    reward=immediate_reward,
+                                    next_state=state,  # Will be updated with actual outcome later
+                                    done=False,
+                                )
 
-                    # Add experience to DQN
-                    self.rl_agent.add_experience(
-                        state=state,
-                        action=dqn_action,
-                        reward=immediate_reward,
-                        next_state=state,  # Will be updated with actual outcome later
-                        done=False,
-                    )
-
-                    models_trained.append("dqn")
-                    logger.debug(
-                        f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
-                    )
+                                models_trained.append("dqn")
+                                logger.debug(
+                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})"
+                                )
 
                 except Exception as e:
                     logger.debug(f"Error training DQN on decision: {e}")
@@ -6462,18 +6606,19 @@ class TradingOrchestrator:
             if self.cob_rl_agent and symbol in self.latest_cob_data and self.is_model_training_enabled("cob_rl"):
                 try:
                     cob_data = self.latest_cob_data[symbol]
-                    if hasattr(self.cob_rl_agent, "add_experience"):
+                    if hasattr(self.cob_rl_agent, "remember"):
                         # Create COB state representation
                         cob_state = self._create_cob_state_for_training(
                             symbol, cob_data
                         )
 
                         # Add COB experience
-                        self.cob_rl_agent.add_experience(
+                        self.cob_rl_agent.remember(
                             state=cob_state,
                             action=action,
                             reward=confidence,
-                            symbol=symbol,
+                            next_state=cob_state,  # Add required next_state parameter
+                            done=False,  # Add required done parameter
                         )
 
                         models_trained.append("cob_rl")
@@ -6638,20 +6783,25 @@ class TradingOrchestrator:
             logger.error(f"Error getting market data for training {symbol}: {e}")
             return None
 
-    def _create_state_from_base_data(self, symbol: str, base_data: Any) -> np.ndarray:
+    def _create_state_from_base_data(self, symbol: str, base_data: Any) -> Optional[np.ndarray]:
         """Create state representation for DQN training from base_data (same as CNN model)"""
         try:
             # Validate base_data
             if not base_data or not hasattr(base_data, 'get_feature_vector'):
-                logger.warning(f"Invalid base_data for {symbol}: {type(base_data)}")
-                return np.zeros(403)  # Default state size for DQN
+                logger.debug(f"Invalid base_data for {symbol}: {type(base_data)}")
+                return None
             
             # Get feature vector from base_data (same as CNN model)
             features = base_data.get_feature_vector()
             
             if not features or len(features) == 0:
-                logger.warning(f"No features available from base_data for {symbol}, using default state")
-                return np.zeros(403)  # Default state size for DQN
+                logger.debug(f"No features available from base_data for {symbol}")
+                return None
+            
+            # Check if all features are zero (invalid state)
+            if all(f == 0 for f in features):
+                logger.debug(f"All features are zero for {symbol}")
+                return None
 
             # Convert to numpy array
             state = np.array(features, dtype=np.float32)
@@ -6671,7 +6821,7 @@ class TradingOrchestrator:
 
         except Exception as e:
             logger.error(f"Error creating state from base_data for {symbol}: {e}")
-            return np.zeros(403)  # Default state size for DQN
+            return None
 
 
 
diff --git a/data/ui_state.json b/data/ui_state.json
new file mode 100644
index 0000000..55c7121
--- /dev/null
+++ b/data/ui_state.json
@@ -0,0 +1,25 @@
+{
+    "model_toggle_states": {
+        "dqn": {
+            "inference_enabled": false,
+            "training_enabled": true
+        },
+        "cnn": {
+            "inference_enabled": true,
+            "training_enabled": true
+        },
+        "cob_rl": {
+            "inference_enabled": true,
+            "training_enabled": true
+        },
+        "decision_fusion": {
+            "inference_enabled": false,
+            "training_enabled": true
+        },
+        "transformer": {
+            "inference_enabled": true,
+            "training_enabled": true
+        }
+    },
+    "timestamp": "2025-07-29T09:07:36.747677"
+}
\ No newline at end of file