tweaks, try live trading

2025-07-08 01:33:22 +03:00
parent 9cd2d5d8a4
commit 4ab7bc1846
8 changed files with 141 additions and 201 deletions
--- a/NN/pycache/main.cpython-312.pyc
+++ b/NN/pycache/main.cpython-312.pyc
--- a/NN/utils/pycache/init.cpython-312.pyc
+++ b/NN/utils/pycache/init.cpython-312.pyc
--- a/NN/utils/pycache/data_interface.cpython-312.pyc
+++ b/NN/utils/pycache/data_interface.cpython-312.pyc
--- a/config.yaml
+++ b/config.yaml
@@ -159,13 +159,13 @@ trading:
 # MEXC Trading API Configuration
 mexc_trading:
  enabled: true
-  trading_mode: simulation  # simulation, testnet, live
+  trading_mode: live  # simulation, testnet, live
  
  # Position sizing as percentage of account balance
-  base_position_percent: 5.0     # 5% base position of account
-  max_position_percent: 20.0     # 20% max position of account
-  min_position_percent: 2.0      # 2% min position of account
-  leverage: 50.0                 # 50x leverage (adjustable in UI)
+  base_position_percent: 1     # 0.5% base position of account (MUCH SAFER)
+  max_position_percent: 5.0      # 2% max position of account (REDUCED)
+  min_position_percent: 0.5      # 0.2% min position of account (REDUCED)
+  leverage: 1.0                  # 1x leverage (NO LEVERAGE FOR TESTING)
  simulation_account_usd: 100.0  # $100 simulation account balance
  
  # Risk management
--- a/core/training_integration.py
+++ b/core/training_integration.py
@@ -229,9 +229,12 @@ class TrainingIntegration:
                    # Truncate
                    features = features[:50]
                
+                # Get the model's device to ensure tensors are on the same device
+                model_device = next(cnn_model.parameters()).device
+                
                # Create tensors
-                features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
-                target_tensor = torch.LongTensor([target]).to(device)
+                features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device)
+                target_tensor = torch.LongTensor([target]).to(model_device)
                
                # Training step
                cnn_model.train()
--- a/enhanced_realtime_training.py
+++ b/enhanced_realtime_training.py
@@ -1489,7 +1489,20 @@ class EnhancedRealtimeTrainingSystem:
            
            outputs = model(features_tensor)
            
-            loss = criterion(outputs, targets_tensor)
+            # Extract logits from model output (model returns a dictionary)
+            if isinstance(outputs, dict):
+                logits = outputs['logits']
+            elif isinstance(outputs, tuple):
+                logits = outputs[0]  # First element is usually logits
+            else:
+                logits = outputs
+            
+            # Ensure logits is a tensor
+            if not isinstance(logits, torch.Tensor):
+                logger.error(f"CNN output is not a tensor: {type(logits)}")
+                return 0.0
+            
+            loss = criterion(logits, targets_tensor)
            
            loss.backward()
            optimizer.step()
--- a/run_clean_dashboard.py
+++ b/run_clean_dashboard.py
@@ -1,201 +1,121 @@
 #!/usr/bin/env python3
 """
-Run Clean Trading Dashboard with Full Training Pipeline
-Integrated system with both training loop and clean web dashboard
+Clean Trading Dashboard Runner with Enhanced Stability and Error Handling
 """

-import os
-# Fix OpenMP library conflicts before importing other modules
-os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
-os.environ['OMP_NUM_THREADS'] = '4'
-
-import asyncio
-import logging
 import sys
-import threading
+import logging
+import traceback
+import gc
 import time
+import psutil
+import torch
 from pathlib import Path

-# Add project root to path
-project_root = Path(__file__).parent
-sys.path.insert(0, str(project_root))
-
-from core.config import get_config, setup_logging
-from core.data_provider import DataProvider
-
-# Import checkpoint management
-from utils.checkpoint_manager import get_checkpoint_manager
-from utils.training_integration import get_training_integration
-
 # Setup logging
-setup_logging()
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)

-async def start_training_pipeline(orchestrator, trading_executor):
-    """Start the training pipeline in the background"""
-    logger.info("=" * 70)
-    logger.info("STARTING TRAINING PIPELINE WITH CLEAN DASHBOARD")
-    logger.info("=" * 70)
+def clear_gpu_memory():
+    """Clear GPU memory cache"""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()

-    # Initialize checkpoint management
-    checkpoint_manager = get_checkpoint_manager()
-    training_integration = get_training_integration()
+def check_system_resources():
+    """Check if system has enough resources"""
+    available_ram = psutil.virtual_memory().available / 1024**3
+    if available_ram < 2.0:  # Less than 2GB available
+        logger.warning(f"Low RAM: {available_ram:.1f} GB available")
+        gc.collect()
+        clear_gpu_memory()
+        return False
+    return True

-    # Training statistics
-    training_stats = {
-        'iteration_count': 0,
-        'total_decisions': 0,
-        'successful_trades': 0,
-        'best_performance': 0.0,
-        'last_checkpoint_iteration': 0
-    }
+def run_dashboard_with_recovery():
+    """Run dashboard with automatic error recovery"""
+    max_retries = 3
+    retry_count = 0
    
+    while retry_count < max_retries:
        try:
-        # Start real-time processing (available in Enhanced orchestrator)
-        if hasattr(orchestrator, 'start_realtime_processing'):
-            await orchestrator.start_realtime_processing()
-            logger.info("Real-time processing started")
+            logger.info(f"Starting Clean Trading Dashboard (attempt {retry_count + 1}/{max_retries})")
            
-        # Start COB integration (available in Enhanced orchestrator)
-        if hasattr(orchestrator, 'start_cob_integration'):
-            await orchestrator.start_cob_integration()
-            logger.info("COB integration started - 5-minute data matrix active")
-        else:
-            logger.info("COB integration not available")
+            # Check system resources
+            if not check_system_resources():
+                logger.warning("System resources low, waiting 30 seconds...")
+                time.sleep(30)
+                continue
            
-        # Main training loop
-        iteration = 0
-        last_checkpoint_time = time.time()
-        
-        while True:
-            try:
-                iteration += 1
-                training_stats['iteration_count'] = iteration
-                
-                # Get symbols to process
-                symbols = orchestrator.symbols if hasattr(orchestrator, 'symbols') else ['ETH/USDT']
-                
-                # Process each symbol
-                for symbol in symbols:
-                    try:
-                        # Make trading decision (this triggers model training)
-                        decision = await orchestrator.make_trading_decision(symbol)
-                        if decision:
-                            training_stats['total_decisions'] += 1
-                            logger.debug(f"[{symbol}] Decision: {decision.action} @ {decision.confidence:.1%}")
-                    
-                    except Exception as e:
-                        logger.warning(f"Error processing {symbol}: {e}")
-                
-                # Status logging every 100 iterations
-                if iteration % 100 == 0:
-                    current_time = time.time()
-                    elapsed = current_time - last_checkpoint_time
-                    
-                    logger.info(f"[TRAINING] Iteration {iteration}, Decisions: {training_stats['total_decisions']}, Time: {elapsed:.1f}s")
-                    
-                    # Models will save their own checkpoints when performance improves
-                    training_stats['last_checkpoint_iteration'] = iteration
-                    last_checkpoint_time = current_time
-                
-                # Brief pause to prevent overwhelming the system
-                await asyncio.sleep(0.1)  # 100ms between iterations
-                
-            except Exception as e:
-                logger.error(f"Training loop error: {e}")
-                await asyncio.sleep(5)  # Wait longer on error
-                
-    except Exception as e:
-        logger.error(f"Training pipeline error: {e}")
-        import traceback
-        logger.error(traceback.format_exc())
-
-def start_clean_dashboard_with_training():
-    """Start clean dashboard with full training pipeline"""
-    try:
-        logger.info("=" * 80)
-        logger.info("CLEAN TRADING DASHBOARD + FULL TRAINING PIPELINE")  
-        logger.info("=" * 80)
-        logger.info("Features: Real-time Training, COB Integration, Clean UI")
-        logger.info("Universal Data Stream: ENABLED")
-        logger.info("Neural Decision Fusion: ENABLED")
-        logger.info("COB Integration: ENABLED")
-        logger.info("GPU Training: ENABLED")
-        logger.info("Multi-symbol: ETH/USDT, BTC/USDT")
-        
-        # Get port from environment or use default
-        dashboard_port = int(os.environ.get('DASHBOARD_PORT', '8051'))
-        logger.info(f"Dashboard: http://127.0.0.1:{dashboard_port}")
-        logger.info("=" * 80)
-        
-        # Check environment variables
-        enable_universal_stream = os.environ.get('ENABLE_UNIVERSAL_DATA_STREAM', '1') == '1'
-        enable_nn_fusion = os.environ.get('ENABLE_NN_DECISION_FUSION', '1') == '1'
-        enable_cob = os.environ.get('ENABLE_COB_INTEGRATION', '1') == '1'
-        
-        logger.info(f"Universal Data Stream: {'ENABLED' if enable_universal_stream else 'DISABLED'}")
-        logger.info(f"Neural Decision Fusion: {'ENABLED' if enable_nn_fusion else 'DISABLED'}")
-        logger.info(f"COB Integration: {'ENABLED' if enable_cob else 'DISABLED'}")
-        
-        # Get configuration
-        config = get_config()
-        
-        # Initialize core components
+            # Import here to avoid memory issues on restart
            from core.data_provider import DataProvider
            from core.orchestrator import TradingOrchestrator
            from core.trading_executor import TradingExecutor
-        
-        # Create data provider
-        data_provider = DataProvider()
-        
-        # Create enhanced orchestrator with COB integration - stable and efficient
-        orchestrator = TradingOrchestrator(data_provider, enhanced_rl_training=True)
-        logger.info("Enhanced Trading Orchestrator created with COB integration")
-        
-        # Create trading executor
-        trading_executor = TradingExecutor()
-        
-        # Import clean dashboard
            from web.clean_dashboard import create_clean_dashboard
            
-        # Create clean dashboard
-        dashboard = create_clean_dashboard(
+            logger.info("Creating data provider...")
+            data_provider = DataProvider()
+            
+            logger.info("Creating trading orchestrator...")
+            orchestrator = TradingOrchestrator(
                data_provider=data_provider,
-            orchestrator=orchestrator,
-            trading_executor=trading_executor
+                enhanced_rl_training=True
            )
-        logger.info("Clean Trading Dashboard created")
            
-        # Start training pipeline in background thread
-        def training_worker():
-            """Run training pipeline in background"""
+            logger.info("Creating trading executor...")
+            trading_executor = TradingExecutor()
+            
+            logger.info("Creating clean dashboard...")
+            dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor)
+            
+            logger.info("Dashboard created successfully")
+            logger.info("=== Clean Trading Dashboard Status ===")
+            logger.info("- Data Provider: Active")
+            logger.info("- Trading Orchestrator: Active")
+            logger.info("- Trading Executor: Active")
+            logger.info("- Enhanced Training: Active")
+            logger.info("- Dashboard: Ready")
+            logger.info("=======================================")
+            
+            # Start the dashboard server with error handling
            try:
-                asyncio.run(start_training_pipeline(orchestrator, trading_executor))
-            except Exception as e:
-                logger.error(f"Training worker error: {e}")
-        
-        training_thread = threading.Thread(target=training_worker, daemon=True)
-        training_thread.start()
-        logger.info("Training pipeline started in background")
-        
-        # Wait a moment for training to initialize
-        time.sleep(3)
-        
-        # Start dashboard server (this blocks)
-        logger.info(" Starting Clean Dashboard Server...")
-        dashboard.run_server(host='127.0.0.1', port=dashboard_port, debug=False)
-        
+                logger.info("Starting dashboard server on http://127.0.0.1:8050")
+                dashboard.run_server(host='127.0.0.1', port=8050, debug=False)
            except KeyboardInterrupt:
-        logger.info("System stopped by user")
+                logger.info("Dashboard stopped by user")
+                break
            except Exception as e:
-        logger.error(f"Error running clean dashboard with training: {e}")
-        import traceback
-        traceback.print_exc()
+                logger.error(f"Dashboard server error: {e}")
+                logger.error(traceback.format_exc())
+                raise
+                
+        except Exception as e:
+            logger.error(f"Critical error in dashboard: {e}")
+            logger.error(traceback.format_exc())
+            
+            retry_count += 1
+            if retry_count < max_retries:
+                logger.info(f"Attempting recovery... ({retry_count}/{max_retries})")
+                
+                # Cleanup
+                gc.collect()
+                clear_gpu_memory()
+                
+                # Wait before retry
+                wait_time = 30 * retry_count  # Exponential backoff
+                logger.info(f"Waiting {wait_time} seconds before retry...")
+                time.sleep(wait_time)
+            else:
+                logger.error("Max retries reached. Exiting.")
                sys.exit(1)

-def main():
-    """Main function"""
-    start_clean_dashboard_with_training()
-
 if __name__ == "__main__":
-    main() 
+    try:
+        run_dashboard_with_recovery()
+    except KeyboardInterrupt:
+        logger.info("Application stopped by user")
+        sys.exit(0)
+    except Exception as e:
+        logger.error(f"Fatal error: {e}")
+        logger.error(traceback.format_exc())
+        sys.exit(1) 
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@@ -5478,15 +5478,18 @@ class CleanTradingDashboard:
                        import torch
                        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                        
+                        # Get the model's device to ensure tensors are on the same device
+                        model_device = next(model.parameters()).device
+                        
                        # Handle different input shapes for different CNN models
                        if hasattr(model, 'input_shape'):
                            # EnhancedCNN model
-                            features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
+                            features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device)
                        else:
                            # Basic CNN model - reshape appropriately
-                            features_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(device)
+                            features_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(model_device)
                        
-                        target_tensor = torch.LongTensor([target]).to(device)
+                        target_tensor = torch.LongTensor([target]).to(model_device)
                        
                        # Set model to training mode and zero gradients
                        model.train()
@@ -5605,10 +5608,11 @@ class CleanTradingDashboard:
                    if hasattr(network, 'forward'):
                        import torch
                        import torch.nn as nn
-                        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-                        features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
-                        action_target_tensor = torch.LongTensor([action_target]).to(device)
-                        confidence_target_tensor = torch.FloatTensor([confidence_target]).to(device)
+                        # Get the model's device to ensure tensors are on the same device
+                        model_device = next(network.parameters()).device
+                        features_tensor = torch.FloatTensor(features).unsqueeze(0).to(model_device)
+                        action_target_tensor = torch.LongTensor([action_target]).to(model_device)
+                        confidence_target_tensor = torch.FloatTensor([confidence_target]).to(model_device)
                        
                        network.train()
                        network_output = network(features_tensor)