stability fixes, lower updates
This commit is contained in:
@ -16,11 +16,17 @@ matplotlib.use('Agg') # Use non-interactive Agg backend
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import platform
|
||||
from safe_logging import setup_safe_logging
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Windows-specific async event loop configuration
|
||||
if platform.system() == "Windows":
|
||||
# Use ProactorEventLoop on Windows for better I/O handling
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
@ -37,11 +43,25 @@ setup_safe_logging()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def start_training_pipeline(orchestrator, trading_executor):
|
||||
"""Start the training pipeline in the background"""
|
||||
"""Start the training pipeline in the background with comprehensive error handling"""
|
||||
logger.info("=" * 70)
|
||||
logger.info("STARTING TRAINING PIPELINE WITH CLEAN DASHBOARD")
|
||||
logger.info("=" * 70)
|
||||
|
||||
# Set up async exception handler
|
||||
def handle_async_exception(loop, context):
|
||||
"""Handle uncaught async exceptions"""
|
||||
exception = context.get('exception')
|
||||
if exception:
|
||||
logger.error(f"Uncaught async exception: {exception}")
|
||||
logger.error(f"Context: {context}")
|
||||
else:
|
||||
logger.error(f"Async error: {context.get('message', 'Unknown error')}")
|
||||
|
||||
# Get current event loop and set exception handler
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.set_exception_handler(handle_async_exception)
|
||||
|
||||
# Initialize checkpoint management
|
||||
checkpoint_manager = get_checkpoint_manager()
|
||||
training_integration = get_training_integration()
|
||||
@ -56,17 +76,23 @@ async def start_training_pipeline(orchestrator, trading_executor):
|
||||
}
|
||||
|
||||
try:
|
||||
# Start real-time processing (available in Enhanced orchestrator)
|
||||
if hasattr(orchestrator, 'start_realtime_processing'):
|
||||
await orchestrator.start_realtime_processing()
|
||||
logger.info("Real-time processing started")
|
||||
# Start real-time processing with error handling
|
||||
try:
|
||||
if hasattr(orchestrator, 'start_realtime_processing'):
|
||||
await orchestrator.start_realtime_processing()
|
||||
logger.info("Real-time processing started")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting real-time processing: {e}")
|
||||
|
||||
# Start COB integration (available in Enhanced orchestrator)
|
||||
if hasattr(orchestrator, 'start_cob_integration'):
|
||||
await orchestrator.start_cob_integration()
|
||||
logger.info("COB integration started - 5-minute data matrix active")
|
||||
else:
|
||||
logger.info("COB integration not available")
|
||||
# Start COB integration with error handling
|
||||
try:
|
||||
if hasattr(orchestrator, 'start_cob_integration'):
|
||||
await orchestrator.start_cob_integration()
|
||||
logger.info("COB integration started - 5-minute data matrix active")
|
||||
else:
|
||||
logger.info("COB integration not available")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting COB integration: {e}")
|
||||
|
||||
# Main training loop
|
||||
iteration = 0
|
||||
@ -170,6 +196,31 @@ def start_clean_dashboard_with_training():
|
||||
orchestrator.trading_executor = trading_executor
|
||||
logger.info("Trading Executor connected to Orchestrator")
|
||||
|
||||
# Initialize system resource monitoring
|
||||
from utils.system_monitor import start_system_monitoring
|
||||
system_monitor = start_system_monitoring()
|
||||
|
||||
# Set up cleanup callback for memory management
|
||||
def cleanup_callback():
|
||||
"""Custom cleanup for memory management"""
|
||||
try:
|
||||
# Clear orchestrator caches
|
||||
if hasattr(orchestrator, 'recent_decisions'):
|
||||
for symbol in orchestrator.recent_decisions:
|
||||
if len(orchestrator.recent_decisions[symbol]) > 50:
|
||||
orchestrator.recent_decisions[symbol] = orchestrator.recent_decisions[symbol][-25:]
|
||||
|
||||
# Clear data provider caches
|
||||
if hasattr(data_provider, 'clear_old_data'):
|
||||
data_provider.clear_old_data()
|
||||
|
||||
logger.info("Custom memory cleanup completed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in custom cleanup: {e}")
|
||||
|
||||
system_monitor.set_callbacks(cleanup=cleanup_callback)
|
||||
logger.info("System resource monitoring started with memory cleanup")
|
||||
|
||||
# Import clean dashboard
|
||||
from web.clean_dashboard import create_clean_dashboard
|
||||
|
||||
@ -178,17 +229,39 @@ def start_clean_dashboard_with_training():
|
||||
dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor)
|
||||
logger.info("Clean Trading Dashboard created")
|
||||
|
||||
# Start training pipeline in background thread
|
||||
# Add memory cleanup method to dashboard
|
||||
def cleanup_dashboard_memory():
|
||||
"""Clean up dashboard memory caches"""
|
||||
try:
|
||||
if hasattr(dashboard, 'recent_decisions'):
|
||||
dashboard.recent_decisions = dashboard.recent_decisions[-50:] # Keep last 50
|
||||
if hasattr(dashboard, 'closed_trades'):
|
||||
dashboard.closed_trades = dashboard.closed_trades[-100:] # Keep last 100
|
||||
if hasattr(dashboard, 'tick_cache'):
|
||||
dashboard.tick_cache = dashboard.tick_cache[-1000:] # Keep last 1000
|
||||
logger.debug("Dashboard memory cleanup completed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in dashboard memory cleanup: {e}")
|
||||
|
||||
# Set cleanup method on dashboard
|
||||
dashboard.cleanup_memory = cleanup_dashboard_memory
|
||||
|
||||
# Start training pipeline in background thread with enhanced error handling
|
||||
def training_worker():
|
||||
"""Run training pipeline in background"""
|
||||
"""Run training pipeline in background with comprehensive error handling"""
|
||||
try:
|
||||
asyncio.run(start_training_pipeline(orchestrator, trading_executor))
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Training worker stopped by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Training worker error: {e}")
|
||||
import traceback
|
||||
logger.error(f"Training worker traceback: {traceback.format_exc()}")
|
||||
# Don't exit - let main thread handle restart
|
||||
|
||||
training_thread = threading.Thread(target=training_worker, daemon=True)
|
||||
training_thread.start()
|
||||
logger.info("Training pipeline started in background")
|
||||
logger.info("Training pipeline started in background with error handling")
|
||||
|
||||
# Wait a moment for training to initialize
|
||||
time.sleep(3)
|
||||
@ -205,9 +278,15 @@ def start_clean_dashboard_with_training():
|
||||
else:
|
||||
logger.warning("Failed to start TensorBoard - training metrics will not be visualized")
|
||||
|
||||
# Start dashboard server (this blocks)
|
||||
logger.info(" Starting Clean Dashboard Server...")
|
||||
dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False)
|
||||
# Start dashboard server with error handling (this blocks)
|
||||
logger.info("Starting Clean Dashboard Server with error handling...")
|
||||
try:
|
||||
dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Dashboard server error: {e}")
|
||||
import traceback
|
||||
logger.error(f"Dashboard server traceback: {traceback.format_exc()}")
|
||||
raise # Re-raise to trigger main error handling
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("System stopped by user")
|
||||
@ -224,8 +303,23 @@ def start_clean_dashboard_with_training():
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
start_clean_dashboard_with_training()
|
||||
"""Main function with comprehensive error handling"""
|
||||
try:
|
||||
start_clean_dashboard_with_training()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Dashboard stopped by user (Ctrl+C)")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error in main: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure logging is flushed on exit
|
||||
import atexit
|
||||
def flush_logs():
|
||||
logging.shutdown()
|
||||
atexit.register(flush_logs)
|
||||
|
||||
main()
|
Reference in New Issue
Block a user