fix sim and wip fix live
This commit is contained in:
@ -1884,7 +1884,10 @@ class EnhancedRealtimeTrainingSystem:
|
||||
if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent')
|
||||
and self.orchestrator.rl_agent):
|
||||
|
||||
# Get Q-values from model
|
||||
# Use RL agent to make prediction
|
||||
current_state = self._get_dqn_state(symbol)
|
||||
if current_state is None:
|
||||
return
|
||||
action = self.orchestrator.rl_agent.act(current_state, explore=False)
|
||||
# Get Q-values separately if available
|
||||
if hasattr(self.orchestrator.rl_agent, 'policy_net'):
|
||||
@ -1893,13 +1896,11 @@ class EnhancedRealtimeTrainingSystem:
|
||||
q_values_tensor = self.orchestrator.rl_agent.policy_net(state_tensor)
|
||||
if isinstance(q_values_tensor, tuple):
|
||||
q_values = q_values_tensor[0].cpu().numpy()[0].tolist()
|
||||
else:
|
||||
q_values = q_values_tensor.cpu().numpy()[0].tolist()
|
||||
else:
|
||||
q_values = [0.33, 0.33, 0.34] # Default uniform distribution
|
||||
|
||||
confidence = max(q_values) / sum(q_values) if sum(q_values) > 0 else 0.33
|
||||
|
||||
|
||||
else:
|
||||
# Fallback to technical analysis-based prediction
|
||||
action, q_values, confidence = self._technical_analysis_prediction(symbol)
|
||||
|
Reference in New Issue
Block a user