training fixes and enhancements wip

This commit is contained in:
Dobromir Popov
2025-07-14 10:00:42 +03:00
parent e76b1b16dc
commit e74f1393c4
6 changed files with 378 additions and 99 deletions

View File

@ -1060,8 +1060,8 @@ class EnhancedRealtimeTrainingSystem:
total_loss += loss
training_iterations += 1
elif hasattr(rl_agent, 'replay'):
# Fallback to replay method
loss = rl_agent.replay(batch_size=len(batch))
# Fallback to replay method - DQNAgent.replay() doesn't accept batch_size parameter
loss = rl_agent.replay()
if loss is not None:
total_loss += loss
training_iterations += 1
@ -1129,25 +1129,10 @@ class EnhancedRealtimeTrainingSystem:
state = combined_features # 2000-dimensional state
# Store experience in COB RL agent
if hasattr(cob_rl_agent, 'store_experience'):
experience = {
'state': state,
'action': action,
'reward': reward,
'next_state': state, # Will be updated with next observation
'done': False,
'symbol': symbol,
'timestamp': datetime.now(),
'price': current_price,
'cob_features': {
'raw_tick_available': raw_tick_matrix is not None,
'aggregated_available': aggregated_matrix is not None,
'imbalance': combined_features[0] if len(combined_features) > 0 else 0,
'spread': combined_features[1] if len(combined_features) > 1 else 0,
'liquidity': combined_features[4] if len(combined_features) > 4 else 0
}
}
cob_rl_agent.store_experience(experience)
if hasattr(cob_rl_agent, 'remember'):
# Use tuple format for DQN agent compatibility
experience_tuple = (state, action, reward, state, False) # next_state = current state for now
cob_rl_agent.remember(state, action, reward, state, False)
training_updates += 1
# Perform COB RL training if enough experiences