fix sim and wip fix live

This commit is contained in:
Dobromir Popov
2025-07-08 02:47:10 +03:00
parent 64678bd8d3
commit 6c91bf0b93
5 changed files with 220 additions and 106 deletions

View File

@ -1884,7 +1884,10 @@ class EnhancedRealtimeTrainingSystem:
if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent')
and self.orchestrator.rl_agent):
# Get Q-values from model
# Use RL agent to make prediction
current_state = self._get_dqn_state(symbol)
if current_state is None:
return
action = self.orchestrator.rl_agent.act(current_state, explore=False)
# Get Q-values separately if available
if hasattr(self.orchestrator.rl_agent, 'policy_net'):
@ -1893,13 +1896,11 @@ class EnhancedRealtimeTrainingSystem:
q_values_tensor = self.orchestrator.rl_agent.policy_net(state_tensor)
if isinstance(q_values_tensor, tuple):
q_values = q_values_tensor[0].cpu().numpy()[0].tolist()
else:
q_values = q_values_tensor.cpu().numpy()[0].tolist()
else:
q_values = [0.33, 0.33, 0.34] # Default uniform distribution
confidence = max(q_values) / sum(q_values) if sum(q_values) > 0 else 0.33
else:
# Fallback to technical analysis-based prediction
action, q_values, confidence = self._technical_analysis_prediction(symbol)