scalping dash also works initially

2025-05-26 16:02:40 +03:00
parent 39942386b1
commit c97177aa88
39 changed files with 7272 additions and 1076 deletions
--- a/test_gpu_training.py
+++ b/test_gpu_training.py
@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+"""
+Test GPU Training - Check if our models actually train and use GPU
+"""
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import time
+import logging
+from pathlib import Path
+import sys
+
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def test_gpu_availability():
+    """Test if GPU is available and working"""
+    logger.info("=== GPU AVAILABILITY TEST ===")
+    
+    print(f"PyTorch version: {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    
+    if torch.cuda.is_available():
+        print(f"CUDA version: {torch.version.cuda}")
+        print(f"GPU count: {torch.cuda.device_count()}")
+        for i in range(torch.cuda.device_count()):
+            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+            print(f"  Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")
+        
+        # Test GPU operations
+        try:
+            device = torch.device('cuda:0')
+            x = torch.randn(100, 100, device=device)
+            y = torch.randn(100, 100, device=device)
+            z = torch.mm(x, y)
+            print(f"✅ GPU operations working: {z.device}")
+            return True
+        except Exception as e:
+            print(f"❌ GPU operations failed: {e}")
+            return False
+    else:
+        print("❌ No CUDA available")
+        return False
+
+def test_simple_training():
+    """Test if a simple neural network actually trains"""
+    logger.info("=== SIMPLE TRAINING TEST ===")
+    
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {device}")
+    
+    # Create a simple model
+    class SimpleNet(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.layers = nn.Sequential(
+                nn.Linear(10, 64),
+                nn.ReLU(),
+                nn.Linear(64, 32),
+                nn.ReLU(),
+                nn.Linear(32, 3)
+            )
+        
+        def forward(self, x):
+            return self.layers(x)
+    
+    model = SimpleNet().to(device)
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+    criterion = nn.CrossEntropyLoss()
+    
+    # Generate some dummy data
+    X = torch.randn(1000, 10, device=device)
+    y = torch.randint(0, 3, (1000,), device=device)
+    
+    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
+    print(f"Data shape: {X.shape}, Labels shape: {y.shape}")
+    
+    # Training loop
+    initial_loss = None
+    losses = []
+    
+    print("Training for 100 steps...")
+    start_time = time.time()
+    
+    for step in range(100):
+        # Forward pass
+        outputs = model(X)
+        loss = criterion(outputs, y)
+        
+        # Backward pass
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        
+        loss_val = loss.item()
+        losses.append(loss_val)
+        
+        if step == 0:
+            initial_loss = loss_val
+        
+        if step % 20 == 0:
+            print(f"Step {step}: Loss = {loss_val:.4f}")
+    
+    end_time = time.time()
+    final_loss = losses[-1]
+    
+    print(f"Training completed in {end_time - start_time:.2f} seconds")
+    print(f"Initial loss: {initial_loss:.4f}")
+    print(f"Final loss: {final_loss:.4f}")
+    print(f"Loss reduction: {initial_loss - final_loss:.4f}")
+    
+    # Check if training actually happened
+    if final_loss < initial_loss * 0.9:  # At least 10% reduction
+        print("✅ Training is working - loss decreased significantly")
+        return True
+    else:
+        print("❌ Training may not be working - loss didn't decrease much")
+        return False
+
+def test_our_models():
+    """Test if our actual models can train"""
+    logger.info("=== OUR MODELS TEST ===")
+    
+    try:
+        # Test DQN Agent
+        from NN.models.dqn_agent import DQNAgent
+        
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        print(f"Testing DQN Agent on {device}")
+        
+        # Create agent
+        state_shape = (100,)  # Simple state
+        agent = DQNAgent(
+            state_shape=state_shape,
+            n_actions=3,
+            learning_rate=0.001,
+            device=device
+        )
+        
+        print(f"✅ DQN Agent created successfully")
+        print(f"   Device: {agent.device}")
+        print(f"   Policy net device: {next(agent.policy_net.parameters()).device}")
+        
+        # Test training step
+        state = np.random.randn(100).astype(np.float32)
+        action = 1
+        reward = 0.5
+        next_state = np.random.randn(100).astype(np.float32)
+        done = False
+        
+        # Add experience and train
+        agent.remember(state, action, reward, next_state, done)
+        
+        # Add more experiences
+        for _ in range(200):  # Need enough for batch
+            s = np.random.randn(100).astype(np.float32)
+            a = np.random.randint(0, 3)
+            r = np.random.randn() * 0.1
+            ns = np.random.randn(100).astype(np.float32)
+            d = np.random.random() < 0.1
+            agent.remember(s, a, r, ns, d)
+        
+        # Test training
+        print("Testing training step...")
+        initial_loss = None
+        for i in range(10):
+            loss = agent.replay()
+            if loss > 0:
+                if initial_loss is None:
+                    initial_loss = loss
+                print(f"  Step {i}: Loss = {loss:.4f}")
+        
+        if initial_loss is not None:
+            print("✅ DQN training is working")
+        else:
+            print("❌ DQN training returned no loss")
+        
+        return True
+        
+    except Exception as e:
+        print(f"❌ Error testing our models: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def test_cnn_model():
+    """Test CNN model training"""
+    logger.info("=== CNN MODEL TEST ===")
+    
+    try:
+        from NN.models.enhanced_cnn import EnhancedCNN
+        
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        print(f"Testing Enhanced CNN on {device}")
+        
+        # Create model
+        state_dim = (3, 20, 26)  # 3 timeframes, 20 window, 26 features
+        n_actions = 3
+        
+        model = EnhancedCNN(state_dim, n_actions).to(device)
+        optimizer = optim.Adam(model.parameters(), lr=0.001)
+        criterion = nn.CrossEntropyLoss()
+        
+        print(f"✅ Enhanced CNN created successfully")
+        print(f"   Parameters: {sum(p.numel() for p in model.parameters()):,}")
+        
+        # Test forward pass
+        batch_size = 32
+        x = torch.randn(batch_size, 3, 20, 26, device=device)
+        
+        print("Testing forward pass...")
+        outputs = model(x)
+        
+        if isinstance(outputs, tuple):
+            action_probs, extrema_pred, price_pred, features, advanced_pred = outputs
+            print(f"✅ Forward pass successful")
+            print(f"   Action probs shape: {action_probs.shape}")
+            print(f"   Features shape: {features.shape}")
+        else:
+            print(f"❌ Unexpected output format: {type(outputs)}")
+            return False
+        
+        # Test training step
+        y = torch.randint(0, 3, (batch_size,), device=device)
+        
+        print("Testing training step...")
+        loss = criterion(action_probs, y)
+        
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        
+        print(f"✅ CNN training step successful, loss: {loss.item():.4f}")
+        return True
+        
+    except Exception as e:
+        print(f"❌ Error testing CNN model: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def main():
+    """Run all tests"""
+    print("=" * 60)
+    print("TESTING GPU TRAINING FUNCTIONALITY")
+    print("=" * 60)
+    
+    results = {}
+    
+    # Test 1: GPU availability
+    results['gpu'] = test_gpu_availability()
+    print()
+    
+    # Test 2: Simple training
+    results['simple_training'] = test_simple_training()
+    print()
+    
+    # Test 3: Our DQN models
+    results['dqn_models'] = test_our_models()
+    print()
+    
+    # Test 4: CNN models
+    results['cnn_models'] = test_cnn_model()
+    print()
+    
+    # Summary
+    print("=" * 60)
+    print("TEST RESULTS SUMMARY")
+    print("=" * 60)
+    
+    for test_name, passed in results.items():
+        status = "✅ PASS" if passed else "❌ FAIL"
+        print(f"{test_name.upper()}: {status}")
+    
+    all_passed = all(results.values())
+    
+    if all_passed:
+        print("\n🎉 ALL TESTS PASSED - Your training should work with GPU!")
+    else:
+        print("\n⚠️  SOME TESTS FAILED - Check the issues above")
+        
+        if not results['gpu']:
+            print("   → GPU not available or not working")
+        if not results['simple_training']:
+            print("   → Basic training loop not working")
+        if not results['dqn_models']:
+            print("   → DQN models have issues")
+        if not results['cnn_models']:
+            print("   → CNN models have issues")
+    
+    return 0 if all_passed else 1
+
+if __name__ == "__main__":
+    exit_code = main()
+    sys.exit(exit_code)