stability fixes, lower updates

2025-07-26 22:32:45 +03:00
parent 9576c52039
commit 7c61c12b70
9 changed files with 1210 additions and 45 deletions
--- a/run_clean_dashboard.py
+++ b/run_clean_dashboard.py
@@ -16,11 +16,17 @@ matplotlib.use('Agg')  # Use non-interactive Agg backend
 import asyncio
 import logging
 import sys
+import platform
 from safe_logging import setup_safe_logging
 import threading
 import time
 from pathlib import Path

+# Windows-specific async event loop configuration
+if platform.system() == "Windows":
+    # Use ProactorEventLoop on Windows for better I/O handling
+    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+
 # Add project root to path
 project_root = Path(__file__).parent
 sys.path.insert(0, str(project_root))
@@ -37,11 +43,25 @@ setup_safe_logging()
 logger = logging.getLogger(__name__)

 async def start_training_pipeline(orchestrator, trading_executor):
-    """Start the training pipeline in the background"""
+    """Start the training pipeline in the background with comprehensive error handling"""
    logger.info("=" * 70)
    logger.info("STARTING TRAINING PIPELINE WITH CLEAN DASHBOARD")
    logger.info("=" * 70)
    
+    # Set up async exception handler
+    def handle_async_exception(loop, context):
+        """Handle uncaught async exceptions"""
+        exception = context.get('exception')
+        if exception:
+            logger.error(f"Uncaught async exception: {exception}")
+            logger.error(f"Context: {context}")
+        else:
+            logger.error(f"Async error: {context.get('message', 'Unknown error')}")
+    
+    # Get current event loop and set exception handler
+    loop = asyncio.get_running_loop()
+    loop.set_exception_handler(handle_async_exception)
+    
    # Initialize checkpoint management
    checkpoint_manager = get_checkpoint_manager()
    training_integration = get_training_integration()
@@ -56,17 +76,23 @@ async def start_training_pipeline(orchestrator, trading_executor):
    }
    
    try:
-        # Start real-time processing (available in Enhanced orchestrator)
-        if hasattr(orchestrator, 'start_realtime_processing'):
-            await orchestrator.start_realtime_processing()
-            logger.info("Real-time processing started")
+        # Start real-time processing with error handling
+        try:
+            if hasattr(orchestrator, 'start_realtime_processing'):
+                await orchestrator.start_realtime_processing()
+                logger.info("Real-time processing started")
+        except Exception as e:
+            logger.error(f"Error starting real-time processing: {e}")
        
-        # Start COB integration (available in Enhanced orchestrator)
-        if hasattr(orchestrator, 'start_cob_integration'):
-            await orchestrator.start_cob_integration()
-            logger.info("COB integration started - 5-minute data matrix active")
-        else:
-            logger.info("COB integration not available")
+        # Start COB integration with error handling
+        try:
+            if hasattr(orchestrator, 'start_cob_integration'):
+                await orchestrator.start_cob_integration()
+                logger.info("COB integration started - 5-minute data matrix active")
+            else:
+                logger.info("COB integration not available")
+        except Exception as e:
+            logger.error(f"Error starting COB integration: {e}")
        
        # Main training loop
        iteration = 0
@@ -170,6 +196,31 @@ def start_clean_dashboard_with_training():
        orchestrator.trading_executor = trading_executor
        logger.info("Trading Executor connected to Orchestrator")
        
+        # Initialize system resource monitoring
+        from utils.system_monitor import start_system_monitoring
+        system_monitor = start_system_monitoring()
+        
+        # Set up cleanup callback for memory management
+        def cleanup_callback():
+            """Custom cleanup for memory management"""
+            try:
+                # Clear orchestrator caches
+                if hasattr(orchestrator, 'recent_decisions'):
+                    for symbol in orchestrator.recent_decisions:
+                        if len(orchestrator.recent_decisions[symbol]) > 50:
+                            orchestrator.recent_decisions[symbol] = orchestrator.recent_decisions[symbol][-25:]
+                
+                # Clear data provider caches
+                if hasattr(data_provider, 'clear_old_data'):
+                    data_provider.clear_old_data()
+                
+                logger.info("Custom memory cleanup completed")
+            except Exception as e:
+                logger.error(f"Error in custom cleanup: {e}")
+        
+        system_monitor.set_callbacks(cleanup=cleanup_callback)
+        logger.info("System resource monitoring started with memory cleanup")
+        
        # Import clean dashboard
        from web.clean_dashboard import create_clean_dashboard
        
@@ -178,17 +229,39 @@ def start_clean_dashboard_with_training():
        dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor)
        logger.info("Clean Trading Dashboard created")
        
-        # Start training pipeline in background thread
+        # Add memory cleanup method to dashboard
+        def cleanup_dashboard_memory():
+            """Clean up dashboard memory caches"""
+            try:
+                if hasattr(dashboard, 'recent_decisions'):
+                    dashboard.recent_decisions = dashboard.recent_decisions[-50:]  # Keep last 50
+                if hasattr(dashboard, 'closed_trades'):
+                    dashboard.closed_trades = dashboard.closed_trades[-100:]  # Keep last 100
+                if hasattr(dashboard, 'tick_cache'):
+                    dashboard.tick_cache = dashboard.tick_cache[-1000:]  # Keep last 1000
+                logger.debug("Dashboard memory cleanup completed")
+            except Exception as e:
+                logger.error(f"Error in dashboard memory cleanup: {e}")
+        
+        # Set cleanup method on dashboard
+        dashboard.cleanup_memory = cleanup_dashboard_memory
+        
+        # Start training pipeline in background thread with enhanced error handling
        def training_worker():
-            """Run training pipeline in background"""
+            """Run training pipeline in background with comprehensive error handling"""
            try:
                asyncio.run(start_training_pipeline(orchestrator, trading_executor))
+            except KeyboardInterrupt:
+                logger.info("Training worker stopped by user")
            except Exception as e:
                logger.error(f"Training worker error: {e}")
+                import traceback
+                logger.error(f"Training worker traceback: {traceback.format_exc()}")
+                # Don't exit - let main thread handle restart
        
        training_thread = threading.Thread(target=training_worker, daemon=True)
        training_thread.start()
-        logger.info("Training pipeline started in background")
+        logger.info("Training pipeline started in background with error handling")
        
        # Wait a moment for training to initialize
        time.sleep(3)
@@ -205,9 +278,15 @@ def start_clean_dashboard_with_training():
        else:
            logger.warning("Failed to start TensorBoard - training metrics will not be visualized")
        
-        # Start dashboard server (this blocks)
-        logger.info(" Starting Clean Dashboard Server...")
-        dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False)
+        # Start dashboard server with error handling (this blocks)
+        logger.info("Starting Clean Dashboard Server with error handling...")
+        try:
+            dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False)
+        except Exception as e:
+            logger.error(f"Dashboard server error: {e}")
+            import traceback
+            logger.error(f"Dashboard server traceback: {traceback.format_exc()}")
+            raise  # Re-raise to trigger main error handling
        
    except KeyboardInterrupt:
        logger.info("System stopped by user")
@@ -224,8 +303,23 @@ def start_clean_dashboard_with_training():
        sys.exit(1)

 def main():
-    """Main function"""
-    start_clean_dashboard_with_training()
+    """Main function with comprehensive error handling"""
+    try:
+        start_clean_dashboard_with_training()
+    except KeyboardInterrupt:
+        logger.info("Dashboard stopped by user (Ctrl+C)")
+        sys.exit(0)
+    except Exception as e:
+        logger.error(f"Critical error in main: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        sys.exit(1)

 if __name__ == "__main__":
+    # Ensure logging is flushed on exit
+    import atexit
+    def flush_logs():
+        logging.shutdown()
+    atexit.register(flush_logs)
+    
    main()