diff --git a/NN/__pycache__/main.cpython-312.pyc b/NN/__pycache__/main.cpython-312.pyc deleted file mode 100644 index 7e8467b..0000000 Binary files a/NN/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/NN/utils/__pycache__/__init__.cpython-312.pyc b/NN/utils/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index db32020..0000000 Binary files a/NN/utils/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/NN/utils/__pycache__/data_interface.cpython-312.pyc b/NN/utils/__pycache__/data_interface.cpython-312.pyc deleted file mode 100644 index 5c90249..0000000 Binary files a/NN/utils/__pycache__/data_interface.cpython-312.pyc and /dev/null differ diff --git a/config.yaml b/config.yaml index bd5bd44..15ef108 100644 --- a/config.yaml +++ b/config.yaml @@ -159,13 +159,13 @@ trading: # MEXC Trading API Configuration mexc_trading: enabled: true - trading_mode: simulation # simulation, testnet, live + trading_mode: live # simulation, testnet, live # Position sizing as percentage of account balance - base_position_percent: 5.0 # 5% base position of account - max_position_percent: 20.0 # 20% max position of account - min_position_percent: 2.0 # 2% min position of account - leverage: 50.0 # 50x leverage (adjustable in UI) + base_position_percent: 1 # 0.5% base position of account (MUCH SAFER) + max_position_percent: 5.0 # 2% max position of account (REDUCED) + min_position_percent: 0.5 # 0.2% min position of account (REDUCED) + leverage: 1.0 # 1x leverage (NO LEVERAGE FOR TESTING) simulation_account_usd: 100.0 # $100 simulation account balance # Risk management diff --git a/core/training_integration.py b/core/training_integration.py index a6d7a3d..ea1419a 100644 --- a/core/training_integration.py +++ b/core/training_integration.py @@ -229,9 +229,12 @@ class TrainingIntegration: # Truncate features = features[:50] + # Get the model's device to ensure tensors are on the same device + model_device = next(cnn_model.parameters()).device + # Create tensors - features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device) - target_tensor = torch.LongTensor([target]).to(device) + features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device) + target_tensor = torch.LongTensor([target]).to(model_device) # Training step cnn_model.train() diff --git a/enhanced_realtime_training.py b/enhanced_realtime_training.py index f1e2089..9ad86ec 100644 --- a/enhanced_realtime_training.py +++ b/enhanced_realtime_training.py @@ -1489,7 +1489,20 @@ class EnhancedRealtimeTrainingSystem: outputs = model(features_tensor) - loss = criterion(outputs, targets_tensor) + # Extract logits from model output (model returns a dictionary) + if isinstance(outputs, dict): + logits = outputs['logits'] + elif isinstance(outputs, tuple): + logits = outputs[0] # First element is usually logits + else: + logits = outputs + + # Ensure logits is a tensor + if not isinstance(logits, torch.Tensor): + logger.error(f"CNN output is not a tensor: {type(logits)}") + return 0.0 + + loss = criterion(logits, targets_tensor) loss.backward() optimizer.step() diff --git a/run_clean_dashboard.py b/run_clean_dashboard.py index 4790260..328159b 100644 --- a/run_clean_dashboard.py +++ b/run_clean_dashboard.py @@ -1,201 +1,121 @@ #!/usr/bin/env python3 """ -Run Clean Trading Dashboard with Full Training Pipeline -Integrated system with both training loop and clean web dashboard +Clean Trading Dashboard Runner with Enhanced Stability and Error Handling """ -import os -# Fix OpenMP library conflicts before importing other modules -os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' -os.environ['OMP_NUM_THREADS'] = '4' - -import asyncio -import logging import sys -import threading +import logging +import traceback +import gc import time +import psutil +import torch from pathlib import Path -# Add project root to path -project_root = Path(__file__).parent -sys.path.insert(0, str(project_root)) - -from core.config import get_config, setup_logging -from core.data_provider import DataProvider - -# Import checkpoint management -from utils.checkpoint_manager import get_checkpoint_manager -from utils.training_integration import get_training_integration - # Setup logging -setup_logging() +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) -async def start_training_pipeline(orchestrator, trading_executor): - """Start the training pipeline in the background""" - logger.info("=" * 70) - logger.info("STARTING TRAINING PIPELINE WITH CLEAN DASHBOARD") - logger.info("=" * 70) - - # Initialize checkpoint management - checkpoint_manager = get_checkpoint_manager() - training_integration = get_training_integration() - - # Training statistics - training_stats = { - 'iteration_count': 0, - 'total_decisions': 0, - 'successful_trades': 0, - 'best_performance': 0.0, - 'last_checkpoint_iteration': 0 - } - - try: - # Start real-time processing (available in Enhanced orchestrator) - if hasattr(orchestrator, 'start_realtime_processing'): - await orchestrator.start_realtime_processing() - logger.info("Real-time processing started") - - # Start COB integration (available in Enhanced orchestrator) - if hasattr(orchestrator, 'start_cob_integration'): - await orchestrator.start_cob_integration() - logger.info("COB integration started - 5-minute data matrix active") - else: - logger.info("COB integration not available") - - # Main training loop - iteration = 0 - last_checkpoint_time = time.time() - - while True: - try: - iteration += 1 - training_stats['iteration_count'] = iteration - - # Get symbols to process - symbols = orchestrator.symbols if hasattr(orchestrator, 'symbols') else ['ETH/USDT'] - - # Process each symbol - for symbol in symbols: - try: - # Make trading decision (this triggers model training) - decision = await orchestrator.make_trading_decision(symbol) - if decision: - training_stats['total_decisions'] += 1 - logger.debug(f"[{symbol}] Decision: {decision.action} @ {decision.confidence:.1%}") - - except Exception as e: - logger.warning(f"Error processing {symbol}: {e}") - - # Status logging every 100 iterations - if iteration % 100 == 0: - current_time = time.time() - elapsed = current_time - last_checkpoint_time - - logger.info(f"[TRAINING] Iteration {iteration}, Decisions: {training_stats['total_decisions']}, Time: {elapsed:.1f}s") - - # Models will save their own checkpoints when performance improves - training_stats['last_checkpoint_iteration'] = iteration - last_checkpoint_time = current_time - - # Brief pause to prevent overwhelming the system - await asyncio.sleep(0.1) # 100ms between iterations - - except Exception as e: - logger.error(f"Training loop error: {e}") - await asyncio.sleep(5) # Wait longer on error - - except Exception as e: - logger.error(f"Training pipeline error: {e}") - import traceback - logger.error(traceback.format_exc()) +def clear_gpu_memory(): + """Clear GPU memory cache""" + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() -def start_clean_dashboard_with_training(): - """Start clean dashboard with full training pipeline""" - try: - logger.info("=" * 80) - logger.info("CLEAN TRADING DASHBOARD + FULL TRAINING PIPELINE") - logger.info("=" * 80) - logger.info("Features: Real-time Training, COB Integration, Clean UI") - logger.info("Universal Data Stream: ENABLED") - logger.info("Neural Decision Fusion: ENABLED") - logger.info("COB Integration: ENABLED") - logger.info("GPU Training: ENABLED") - logger.info("Multi-symbol: ETH/USDT, BTC/USDT") - - # Get port from environment or use default - dashboard_port = int(os.environ.get('DASHBOARD_PORT', '8051')) - logger.info(f"Dashboard: http://127.0.0.1:{dashboard_port}") - logger.info("=" * 80) - - # Check environment variables - enable_universal_stream = os.environ.get('ENABLE_UNIVERSAL_DATA_STREAM', '1') == '1' - enable_nn_fusion = os.environ.get('ENABLE_NN_DECISION_FUSION', '1') == '1' - enable_cob = os.environ.get('ENABLE_COB_INTEGRATION', '1') == '1' - - logger.info(f"Universal Data Stream: {'ENABLED' if enable_universal_stream else 'DISABLED'}") - logger.info(f"Neural Decision Fusion: {'ENABLED' if enable_nn_fusion else 'DISABLED'}") - logger.info(f"COB Integration: {'ENABLED' if enable_cob else 'DISABLED'}") - - # Get configuration - config = get_config() - - # Initialize core components - from core.data_provider import DataProvider - from core.orchestrator import TradingOrchestrator - from core.trading_executor import TradingExecutor - - # Create data provider - data_provider = DataProvider() - - # Create enhanced orchestrator with COB integration - stable and efficient - orchestrator = TradingOrchestrator(data_provider, enhanced_rl_training=True) - logger.info("Enhanced Trading Orchestrator created with COB integration") - - # Create trading executor - trading_executor = TradingExecutor() - - # Import clean dashboard - from web.clean_dashboard import create_clean_dashboard - - # Create clean dashboard - dashboard = create_clean_dashboard( - data_provider=data_provider, - orchestrator=orchestrator, - trading_executor=trading_executor - ) - logger.info("Clean Trading Dashboard created") - - # Start training pipeline in background thread - def training_worker(): - """Run training pipeline in background""" - try: - asyncio.run(start_training_pipeline(orchestrator, trading_executor)) - except Exception as e: - logger.error(f"Training worker error: {e}") - - training_thread = threading.Thread(target=training_worker, daemon=True) - training_thread.start() - logger.info("Training pipeline started in background") - - # Wait a moment for training to initialize - time.sleep(3) - - # Start dashboard server (this blocks) - logger.info(" Starting Clean Dashboard Server...") - dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False) - - except KeyboardInterrupt: - logger.info("System stopped by user") - except Exception as e: - logger.error(f"Error running clean dashboard with training: {e}") - import traceback - traceback.print_exc() - sys.exit(1) +def check_system_resources(): + """Check if system has enough resources""" + available_ram = psutil.virtual_memory().available / 1024**3 + if available_ram < 2.0: # Less than 2GB available + logger.warning(f"Low RAM: {available_ram:.1f} GB available") + gc.collect() + clear_gpu_memory() + return False + return True -def main(): - """Main function""" - start_clean_dashboard_with_training() +def run_dashboard_with_recovery(): + """Run dashboard with automatic error recovery""" + max_retries = 3 + retry_count = 0 + + while retry_count < max_retries: + try: + logger.info(f"Starting Clean Trading Dashboard (attempt {retry_count + 1}/{max_retries})") + + # Check system resources + if not check_system_resources(): + logger.warning("System resources low, waiting 30 seconds...") + time.sleep(30) + continue + + # Import here to avoid memory issues on restart + from core.data_provider import DataProvider + from core.orchestrator import TradingOrchestrator + from core.trading_executor import TradingExecutor + from web.clean_dashboard import create_clean_dashboard + + logger.info("Creating data provider...") + data_provider = DataProvider() + + logger.info("Creating trading orchestrator...") + orchestrator = TradingOrchestrator( + data_provider=data_provider, + enhanced_rl_training=True + ) + + logger.info("Creating trading executor...") + trading_executor = TradingExecutor() + + logger.info("Creating clean dashboard...") + dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor) + + logger.info("Dashboard created successfully") + logger.info("=== Clean Trading Dashboard Status ===") + logger.info("- Data Provider: Active") + logger.info("- Trading Orchestrator: Active") + logger.info("- Trading Executor: Active") + logger.info("- Enhanced Training: Active") + logger.info("- Dashboard: Ready") + logger.info("=======================================") + + # Start the dashboard server with error handling + try: + logger.info("Starting dashboard server on http://127.0.0.1:8050") + dashboard.run_server(host='127.0.0.1', port=8050, debug=False) + except KeyboardInterrupt: + logger.info("Dashboard stopped by user") + break + except Exception as e: + logger.error(f"Dashboard server error: {e}") + logger.error(traceback.format_exc()) + raise + + except Exception as e: + logger.error(f"Critical error in dashboard: {e}") + logger.error(traceback.format_exc()) + + retry_count += 1 + if retry_count < max_retries: + logger.info(f"Attempting recovery... ({retry_count}/{max_retries})") + + # Cleanup + gc.collect() + clear_gpu_memory() + + # Wait before retry + wait_time = 30 * retry_count # Exponential backoff + logger.info(f"Waiting {wait_time} seconds before retry...") + time.sleep(wait_time) + else: + logger.error("Max retries reached. Exiting.") + sys.exit(1) if __name__ == "__main__": - main() \ No newline at end of file + try: + run_dashboard_with_recovery() + except KeyboardInterrupt: + logger.info("Application stopped by user") + sys.exit(0) + except Exception as e: + logger.error(f"Fatal error: {e}") + logger.error(traceback.format_exc()) + sys.exit(1) \ No newline at end of file diff --git a/web/clean_dashboard.py b/web/clean_dashboard.py index d916a45..669df70 100644 --- a/web/clean_dashboard.py +++ b/web/clean_dashboard.py @@ -5478,15 +5478,18 @@ class CleanTradingDashboard: import torch device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # Get the model's device to ensure tensors are on the same device + model_device = next(model.parameters()).device + # Handle different input shapes for different CNN models if hasattr(model, 'input_shape'): # EnhancedCNN model - features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device) + features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device) else: # Basic CNN model - reshape appropriately - features_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(device) + features_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(model_device) - target_tensor = torch.LongTensor([target]).to(device) + target_tensor = torch.LongTensor([target]).to(model_device) # Set model to training mode and zero gradients model.train() @@ -5605,10 +5608,11 @@ class CleanTradingDashboard: if hasattr(network, 'forward'): import torch import torch.nn as nn - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device) - action_target_tensor = torch.LongTensor([action_target]).to(device) - confidence_target_tensor = torch.FloatTensor([confidence_target]).to(device) + # Get the model's device to ensure tensors are on the same device + model_device = next(network.parameters()).device + features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device) + action_target_tensor = torch.LongTensor([action_target]).to(model_device) + confidence_target_tensor = torch.FloatTensor([confidence_target]).to(model_device) network.train() network_output = network(features_tensor)