fix sim and wip fix live

2025-07-08 02:47:10 +03:00
parent 64678bd8d3
commit 6c91bf0b93
5 changed files with 220 additions and 106 deletions
--- a/enhanced_realtime_training.py
+++ b/enhanced_realtime_training.py
@@ -1884,7 +1884,10 @@ class EnhancedRealtimeTrainingSystem:
            if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent') 
                and self.orchestrator.rl_agent):
                
-                # Get Q-values from model
+                # Use RL agent to make prediction
+                current_state = self._get_dqn_state(symbol)
+                if current_state is None:
+                    return
                action = self.orchestrator.rl_agent.act(current_state, explore=False)
                # Get Q-values separately if available
                if hasattr(self.orchestrator.rl_agent, 'policy_net'):
@@ -1893,13 +1896,11 @@ class EnhancedRealtimeTrainingSystem:
                        q_values_tensor = self.orchestrator.rl_agent.policy_net(state_tensor)
                        if isinstance(q_values_tensor, tuple):
                            q_values = q_values_tensor[0].cpu().numpy()[0].tolist()
-                        else:
-                            q_values = q_values_tensor.cpu().numpy()[0].tolist()
                else:
                    q_values = [0.33, 0.33, 0.34]  # Default uniform distribution
                
                confidence = max(q_values) / sum(q_values) if sum(q_values) > 0 else 0.33
-                
+
            else:
                # Fallback to technical analysis-based prediction
                action, q_values, confidence = self._technical_analysis_prediction(symbol)