#!/usr/bin/env python3
"""
Test script for the massive 50M parameter DQN agent
Tests:
1. Model initialization and parameter count
2. Forward pass functionality
3. Gradient flow verification
4. Training step simulation
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

import torch
import numpy as np
from NN.models.dqn_agent import DQNAgent, DQNNetwork
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_dqn_architecture():
    """Test the new massive DQN architecture"""
    print("🔥 Testing Massive DQN Architecture (Target: 50M parameters)")
    
    # Test the network directly first
    input_dim = 7850  # BaseDataInput feature size
    n_actions = 3  # BUY, SELL, HOLD
    
    print(f"\n1. Creating DQN Network with input_dim={input_dim}, n_actions={n_actions}")
    network = DQNNetwork(input_dim, n_actions)
    
    # Count parameters
    total_params = sum(p.numel() for p in network.parameters())
    print(f"   ✅ Total parameters: {total_params:,}")
    print(f"   🎯 Target achieved: {total_params >= 50_000_000}")
    
    # Test forward pass
    print(f"\n2. Testing forward pass...")
    batch_size = 4
    test_input = torch.randn(batch_size, input_dim)
    
    with torch.no_grad():
        output = network(test_input)
        
    if isinstance(output, tuple):
        q_values, regime_pred, price_pred, volatility_pred, features, multi_timeframe_pred = output
        print(f"   ✅ Q-values shape: {q_values.shape}")
        print(f"   ✅ Regime prediction shape: {regime_pred.shape}")  
        print(f"   ✅ Price prediction shape: {price_pred.shape}")
        print(f"   ✅ Volatility prediction shape: {volatility_pred.shape}")
        print(f"   ✅ Features shape: {features.shape}")
        print(f"   ✅ Multi-timeframe predictions shape: {multi_timeframe_pred.shape}")
    else:
        print(f"   ✅ Output shape: {output.shape}")
    
    return network

def test_gradient_flow():
    """Test that gradients flow properly through the network"""
    print(f"\n🧪 Testing Gradient Flow...")
    
    # Create agent
    state_shape = (7850,)
    agent = DQNAgent(
        state_shape=state_shape,
        n_actions=3,
        learning_rate=0.001,
        batch_size=16,
        buffer_size=1000
    )
    
    # Force disable mixed precision
    agent.use_mixed_precision = False
    print(f"   ✅ Mixed precision disabled: {not agent.use_mixed_precision}")
    
    # Ensure model is in training mode
    agent.policy_net.train()
    print(f"   ✅ Model in training mode: {agent.policy_net.training}")
    
    # Create test batch
    batch_size = 8
    state_dim = 7850
    
    states = torch.randn(batch_size, state_dim, requires_grad=True)
    actions = torch.randint(0, 3, (batch_size,))
    rewards = torch.randn(batch_size)
    next_states = torch.randn(batch_size, state_dim)
    dones = torch.zeros(batch_size)
    
    print(f"   📊 Test batch created - states: {states.shape}, actions: {actions.shape}")
    
    # Test forward pass and check gradients
    agent.optimizer.zero_grad()
    
    # Forward pass
    output = agent.policy_net(states)
    if isinstance(output, tuple):
        q_values = output[0]
    else:
        q_values = output
        
    print(f"   ✅ Forward pass successful - Q-values: {q_values.shape}")
    print(f"   ✅ Q-values require grad: {q_values.requires_grad}")
    
    # Gather Q-values for actions
    current_q_values = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
    print(f"   ✅ Gathered Q-values require grad: {current_q_values.requires_grad}")
    
    # Compute simple loss
    target_q_values = rewards  # Simplified target
    loss = torch.nn.MSELoss()(current_q_values, target_q_values)
    print(f"   ✅ Loss computed: {loss.item():.6f}")
    print(f"   ✅ Loss requires grad: {loss.requires_grad}")
    
    # Backward pass
    loss.backward()
    
    # Check if gradients exist and are finite
    grad_norms = []
    params_with_grad = 0
    total_params = 0
    
    for name, param in agent.policy_net.named_parameters():
        total_params += 1
        if param.grad is not None:
            params_with_grad += 1
            grad_norm = param.grad.norm().item()
            grad_norms.append(grad_norm)
            if not torch.isfinite(param.grad).all():
                print(f"   ❌ Non-finite gradients in {name}")
                return False
    
    print(f"   ✅ Parameters with gradients: {params_with_grad}/{total_params}")
    print(f"   ✅ Average gradient norm: {np.mean(grad_norms):.6f}")
    print(f"   ✅ Max gradient norm: {max(grad_norms):.6f}")
    
    # Test optimizer step
    agent.optimizer.step()
    print(f"   ✅ Optimizer step completed successfully")
    
    return True

def test_training_step():
    """Test a complete training step"""
    print(f"\n🏋️ Testing Complete Training Step...")
    
    # Create agent
    state_shape = (7850,)
    agent = DQNAgent(
        state_shape=state_shape,
        n_actions=3,
        learning_rate=0.001,
        batch_size=8,
        buffer_size=1000
    )
    
    # Force disable mixed precision
    agent.use_mixed_precision = False
    
    # Add some experiences
    for i in range(20):
        state = np.random.randn(7850).astype(np.float32)
        action = np.random.randint(0, 3)
        reward = np.random.randn() * 0.1
        next_state = np.random.randn(7850).astype(np.float32)
        done = np.random.random() < 0.1
        
        agent.remember(state, action, reward, next_state, done)
    
    print(f"   ✅ Added {len(agent.memory)} experiences to memory")
    
    # Test replay training
    if len(agent.memory) >= agent.batch_size:
        loss = agent.replay()
        print(f"   ✅ Training completed with loss: {loss:.6f}")
        
        if loss > 0:
            print(f"   ✅ Training successful - non-zero loss indicates learning")
            return True
        else:
            print(f"   ❌ Training failed - zero loss indicates gradient issues")
            return False
    else:
        print(f"   ⚠️  Not enough experiences for training")
        return True

def main():
    """Run all tests"""
    print("🚀 MASSIVE DQN AGENT TESTING SUITE")
    print("=" * 50)
    
    # Test 1: Architecture
    try:
        network = test_dqn_architecture()
        print("   ✅ Architecture test PASSED")
    except Exception as e:
        print(f"   ❌ Architecture test FAILED: {e}")
        return False
    
    # Test 2: Gradient flow
    try:
        gradient_success = test_gradient_flow()
        if gradient_success:
            print("   ✅ Gradient flow test PASSED")
        else:
            print("   ❌ Gradient flow test FAILED")
            return False
    except Exception as e:
        print(f"   ❌ Gradient flow test FAILED: {e}")
        return False
    
    # Test 3: Training step
    try:
        training_success = test_training_step()
        if training_success:
            print("   ✅ Training step test PASSED")
        else:
            print("   ❌ Training step test FAILED")
            return False
    except Exception as e:
        print(f"   ❌ Training step test FAILED: {e}")
        return False
    
    print("\n🎉 ALL TESTS PASSED!")
    print("✅ Massive DQN agent is ready for 50M parameter learning!")
    return True

if __name__ == "__main__":
    success = main()
    exit(0 if success else 1)