checkbox manager and handling

This commit is contained in:
Dobromir Popov
2025-06-24 21:59:23 +03:00
parent 706eb13912
commit ab8c94d735
8 changed files with 1170 additions and 29 deletions

122
main.py
View File

@ -32,6 +32,10 @@ sys.path.insert(0, str(project_root))
from core.config import get_config, setup_logging, Config
from core.data_provider import DataProvider
# Import checkpoint management
from utils.checkpoint_manager import get_checkpoint_manager
from utils.training_integration import get_training_integration
logger = logging.getLogger(__name__)
async def run_web_dashboard():
@ -80,6 +84,11 @@ async def run_web_dashboard():
model_registry = {}
logger.warning("Model registry not available, using empty registry")
# Initialize checkpoint management
checkpoint_manager = get_checkpoint_manager()
training_integration = get_training_integration()
logger.info("Checkpoint management initialized for training pipeline")
# Create streamlined orchestrator with 2-action system and always-invested approach
orchestrator = EnhancedTradingOrchestrator(
data_provider=data_provider,
@ -90,6 +99,9 @@ async def run_web_dashboard():
logger.info("Enhanced Trading Orchestrator with 2-Action System initialized")
logger.info("Always Invested: Learning to spot high risk/reward setups")
# Checkpoint management will be handled in the training loop
logger.info("Checkpoint management will be initialized in training loop")
# Start COB integration for real-time market microstructure
try:
# Create and start COB integration task
@ -162,6 +174,10 @@ def start_web_ui(port=8051):
except ImportError:
model_registry = {}
# Initialize checkpoint management for dashboard
dashboard_checkpoint_manager = get_checkpoint_manager()
dashboard_training_integration = get_training_integration()
# Create enhanced orchestrator for the dashboard (WITH COB integration)
dashboard_orchestrator = EnhancedTradingOrchestrator(
data_provider=data_provider,
@ -181,6 +197,7 @@ def start_web_ui(port=8051):
logger.info("Enhanced TradingDashboard created successfully")
logger.info("Features: Live trading, COB visualization, RL training monitoring, Position management")
logger.info("✅ Checkpoint management integrated for training persistence")
# Run the dashboard server (COB integration will start automatically)
dashboard.app.run(host='127.0.0.1', port=port, debug=False, use_reloader=False)
@ -191,11 +208,24 @@ def start_web_ui(port=8051):
logger.error(traceback.format_exc())
async def start_training_loop(orchestrator, trading_executor):
"""Start the main training and monitoring loop"""
"""Start the main training and monitoring loop with checkpoint management"""
logger.info("=" * 70)
logger.info("STARTING ENHANCED TRAINING LOOP WITH COB INTEGRATION")
logger.info("=" * 70)
# Initialize checkpoint management for training loop
checkpoint_manager = get_checkpoint_manager()
training_integration = get_training_integration()
# Training statistics for checkpoint management
training_stats = {
'iteration_count': 0,
'total_decisions': 0,
'successful_trades': 0,
'best_performance': 0.0,
'last_checkpoint_iteration': 0
}
try:
# Start real-time processing
await orchestrator.start_realtime_processing()
@ -204,27 +234,88 @@ async def start_training_loop(orchestrator, trading_executor):
iteration = 0
while True:
iteration += 1
training_stats['iteration_count'] = iteration
logger.info(f"Training iteration {iteration}")
# Make coordinated decisions (this triggers CNN and RL training)
decisions = await orchestrator.make_coordinated_decisions()
# Process decisions and collect training metrics
iteration_decisions = 0
iteration_performance = 0.0
# Log decisions and performance
for symbol, decision in decisions.items():
if decision:
iteration_decisions += 1
logger.info(f"{symbol}: {decision.action} (confidence: {decision.confidence:.3f})")
# Track performance for checkpoint management
iteration_performance += decision.confidence
# Execute if confidence is high enough
if decision.confidence > 0.7:
logger.info(f"Executing {symbol}: {decision.action}")
training_stats['successful_trades'] += 1
# trading_executor.execute_action(decision)
# Update training statistics
training_stats['total_decisions'] += iteration_decisions
if iteration_performance > training_stats['best_performance']:
training_stats['best_performance'] = iteration_performance
# Save checkpoint every 50 iterations or when performance improves significantly
should_save_checkpoint = (
iteration % 50 == 0 or # Regular interval
iteration_performance > training_stats['best_performance'] * 1.1 or # 10% improvement
iteration - training_stats['last_checkpoint_iteration'] >= 100 # Force save every 100 iterations
)
if should_save_checkpoint:
try:
# Create performance metrics for checkpoint
performance_metrics = {
'avg_confidence': iteration_performance / max(iteration_decisions, 1),
'success_rate': training_stats['successful_trades'] / max(training_stats['total_decisions'], 1),
'total_decisions': training_stats['total_decisions'],
'iteration': iteration
}
# Save orchestrator state (if it has models)
if hasattr(orchestrator, 'rl_agent') and orchestrator.rl_agent:
saved = orchestrator.rl_agent.save_checkpoint(iteration_performance)
if saved:
logger.info(f"✅ RL Agent checkpoint saved at iteration {iteration}")
if hasattr(orchestrator, 'cnn_model') and orchestrator.cnn_model:
# Simulate CNN checkpoint save
logger.info(f"✅ CNN Model training state saved at iteration {iteration}")
if hasattr(orchestrator, 'extrema_trainer') and orchestrator.extrema_trainer:
saved = orchestrator.extrema_trainer.save_checkpoint()
if saved:
logger.info(f"✅ ExtremaTrainer checkpoint saved at iteration {iteration}")
training_stats['last_checkpoint_iteration'] = iteration
logger.info(f"📊 Checkpoint management completed for iteration {iteration}")
except Exception as e:
logger.warning(f"Checkpoint saving failed at iteration {iteration}: {e}")
# Log performance metrics every 10 iterations
if iteration % 10 == 0:
metrics = orchestrator.get_performance_metrics()
logger.info(f"Performance metrics: {metrics}")
# Log training statistics
logger.info(f"Training stats: {training_stats}")
# Log checkpoint statistics
checkpoint_stats = checkpoint_manager.get_checkpoint_stats()
logger.info(f"Checkpoints: {checkpoint_stats['total_checkpoints']} total, "
f"{checkpoint_stats['total_size_mb']:.2f} MB")
# Log COB integration status
for symbol in orchestrator.symbols:
cob_features = orchestrator.latest_cob_features.get(symbol)
@ -242,9 +333,29 @@ async def start_training_loop(orchestrator, trading_executor):
import traceback
logger.error(traceback.format_exc())
finally:
# Save final checkpoints before shutdown
try:
logger.info("Saving final checkpoints before shutdown...")
if hasattr(orchestrator, 'rl_agent') and orchestrator.rl_agent:
orchestrator.rl_agent.save_checkpoint(0.0, force_save=True)
logger.info("✅ Final RL Agent checkpoint saved")
if hasattr(orchestrator, 'extrema_trainer') and orchestrator.extrema_trainer:
orchestrator.extrema_trainer.save_checkpoint(force_save=True)
logger.info("✅ Final ExtremaTrainer checkpoint saved")
# Log final checkpoint statistics
final_stats = checkpoint_manager.get_checkpoint_stats()
logger.info(f"📊 Final checkpoint stats: {final_stats['total_checkpoints']} checkpoints, "
f"{final_stats['total_size_mb']:.2f} MB total")
except Exception as e:
logger.warning(f"Error saving final checkpoints: {e}")
await orchestrator.stop_realtime_processing()
await orchestrator.stop_cob_integration()
logger.info("Training loop stopped")
logger.info("Training loop stopped with checkpoint management")
async def main():
"""Main entry point with both training loop and web dashboard"""
@ -258,7 +369,9 @@ async def main():
args = parser.parse_args()
# Setup logging
# Setup logging and ensure directories exist
Path("logs").mkdir(exist_ok=True)
Path("NN/models/saved").mkdir(parents=True, exist_ok=True)
setup_logging()
try:
@ -271,6 +384,9 @@ async def main():
logger.info("Always Invested: Learning to spot high risk/reward setups")
logger.info("Flow: Data -> COB -> Indicators -> CNN -> RL -> Orchestrator -> Execution")
logger.info("Main Dashboard: Live trading, RL monitoring, Position management")
logger.info("🔄 Checkpoint Management: Automatic training state persistence")
# logger.info("📊 W&B Integration: Optional experiment tracking")
logger.info("💾 Model Rotation: Keep best 5 checkpoints per model")
logger.info("=" * 70)
# Start main trading dashboard UI in a separate thread