training fixes and enhancements wip

2025-07-14 10:00:42 +03:00
parent e76b1b16dc
commit e74f1393c4
6 changed files with 378 additions and 99 deletions
--- a/enhanced_realtime_training.py
+++ b/enhanced_realtime_training.py
@ -1060,8 +1060,8 @@ class EnhancedRealtimeTrainingSystem:
                        total_loss += loss
                        training_iterations += 1
                elif hasattr(rl_agent, 'replay'):
-                    # Fallback to replay method
-                    loss = rl_agent.replay(batch_size=len(batch))
+                    # Fallback to replay method - DQNAgent.replay() doesn't accept batch_size parameter
+                    loss = rl_agent.replay()
                    if loss is not None:
                        total_loss += loss
                        training_iterations += 1
@ -1129,25 +1129,10 @@ class EnhancedRealtimeTrainingSystem:
                            state = combined_features  # 2000-dimensional state
                            
                            # Store experience in COB RL agent
-                            if hasattr(cob_rl_agent, 'store_experience'):
-                                experience = {
-                                    'state': state,
-                                    'action': action,
-                                    'reward': reward,
-                                    'next_state': state,  # Will be updated with next observation
-                                    'done': False,
-                                    'symbol': symbol,
-                                    'timestamp': datetime.now(),
-                                    'price': current_price,
-                                    'cob_features': {
-                                        'raw_tick_available': raw_tick_matrix is not None,
-                                        'aggregated_available': aggregated_matrix is not None,
-                                        'imbalance': combined_features[0] if len(combined_features) > 0 else 0,
-                                        'spread': combined_features[1] if len(combined_features) > 1 else 0,
-                                        'liquidity': combined_features[4] if len(combined_features) > 4 else 0
-                                    }
-                                }
-                                cob_rl_agent.store_experience(experience)
+                            if hasattr(cob_rl_agent, 'remember'):
+                                # Use tuple format for DQN agent compatibility
+                                experience_tuple = (state, action, reward, state, False)  # next_state = current state for now
+                                cob_rl_agent.remember(state, action, reward, state, False)
                                training_updates += 1
                            
                            # Perform COB RL training if enough experiences