training fixes and enhancements wip
This commit is contained in:
@ -1060,8 +1060,8 @@ class EnhancedRealtimeTrainingSystem:
|
||||
total_loss += loss
|
||||
training_iterations += 1
|
||||
elif hasattr(rl_agent, 'replay'):
|
||||
# Fallback to replay method
|
||||
loss = rl_agent.replay(batch_size=len(batch))
|
||||
# Fallback to replay method - DQNAgent.replay() doesn't accept batch_size parameter
|
||||
loss = rl_agent.replay()
|
||||
if loss is not None:
|
||||
total_loss += loss
|
||||
training_iterations += 1
|
||||
@ -1129,25 +1129,10 @@ class EnhancedRealtimeTrainingSystem:
|
||||
state = combined_features # 2000-dimensional state
|
||||
|
||||
# Store experience in COB RL agent
|
||||
if hasattr(cob_rl_agent, 'store_experience'):
|
||||
experience = {
|
||||
'state': state,
|
||||
'action': action,
|
||||
'reward': reward,
|
||||
'next_state': state, # Will be updated with next observation
|
||||
'done': False,
|
||||
'symbol': symbol,
|
||||
'timestamp': datetime.now(),
|
||||
'price': current_price,
|
||||
'cob_features': {
|
||||
'raw_tick_available': raw_tick_matrix is not None,
|
||||
'aggregated_available': aggregated_matrix is not None,
|
||||
'imbalance': combined_features[0] if len(combined_features) > 0 else 0,
|
||||
'spread': combined_features[1] if len(combined_features) > 1 else 0,
|
||||
'liquidity': combined_features[4] if len(combined_features) > 4 else 0
|
||||
}
|
||||
}
|
||||
cob_rl_agent.store_experience(experience)
|
||||
if hasattr(cob_rl_agent, 'remember'):
|
||||
# Use tuple format for DQN agent compatibility
|
||||
experience_tuple = (state, action, reward, state, False) # next_state = current state for now
|
||||
cob_rl_agent.remember(state, action, reward, state, False)
|
||||
training_updates += 1
|
||||
|
||||
# Perform COB RL training if enough experiences
|
||||
|
Reference in New Issue
Block a user