#!/usr/bin/env python3 """ Test script for the massive 50M parameter DQN agent Tests: 1. Model initialization and parameter count 2. Forward pass functionality 3. Gradient flow verification 4. Training step simulation """ import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) import torch import numpy as np from NN.models.dqn_agent import DQNAgent, DQNNetwork import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def test_dqn_architecture(): """Test the new massive DQN architecture""" print("๐Ÿ”ฅ Testing Massive DQN Architecture (Target: 50M parameters)") # Test the network directly first input_dim = 7850 # BaseDataInput feature size n_actions = 3 # BUY, SELL, HOLD print(f"\n1. Creating DQN Network with input_dim={input_dim}, n_actions={n_actions}") network = DQNNetwork(input_dim, n_actions) # Count parameters total_params = sum(p.numel() for p in network.parameters()) print(f" โœ… Total parameters: {total_params:,}") print(f" ๐ŸŽฏ Target achieved: {total_params >= 50_000_000}") # Test forward pass print(f"\n2. Testing forward pass...") batch_size = 4 test_input = torch.randn(batch_size, input_dim) with torch.no_grad(): output = network(test_input) if isinstance(output, tuple): q_values, regime_pred, price_pred, volatility_pred, features, multi_timeframe_pred = output print(f" โœ… Q-values shape: {q_values.shape}") print(f" โœ… Regime prediction shape: {regime_pred.shape}") print(f" โœ… Price prediction shape: {price_pred.shape}") print(f" โœ… Volatility prediction shape: {volatility_pred.shape}") print(f" โœ… Features shape: {features.shape}") print(f" โœ… Multi-timeframe predictions shape: {multi_timeframe_pred.shape}") else: print(f" โœ… Output shape: {output.shape}") return network def test_gradient_flow(): """Test that gradients flow properly through the network""" print(f"\n๐Ÿงช Testing Gradient Flow...") # Create agent state_shape = (7850,) agent = DQNAgent( state_shape=state_shape, n_actions=3, learning_rate=0.001, batch_size=16, buffer_size=1000 ) # Force disable mixed precision agent.use_mixed_precision = False print(f" โœ… Mixed precision disabled: {not agent.use_mixed_precision}") # Ensure model is in training mode agent.policy_net.train() print(f" โœ… Model in training mode: {agent.policy_net.training}") # Create test batch batch_size = 8 state_dim = 7850 states = torch.randn(batch_size, state_dim, requires_grad=True) actions = torch.randint(0, 3, (batch_size,)) rewards = torch.randn(batch_size) next_states = torch.randn(batch_size, state_dim) dones = torch.zeros(batch_size) print(f" ๐Ÿ“Š Test batch created - states: {states.shape}, actions: {actions.shape}") # Test forward pass and check gradients agent.optimizer.zero_grad() # Forward pass output = agent.policy_net(states) if isinstance(output, tuple): q_values = output[0] else: q_values = output print(f" โœ… Forward pass successful - Q-values: {q_values.shape}") print(f" โœ… Q-values require grad: {q_values.requires_grad}") # Gather Q-values for actions current_q_values = q_values.gather(1, actions.unsqueeze(1)).squeeze(1) print(f" โœ… Gathered Q-values require grad: {current_q_values.requires_grad}") # Compute simple loss target_q_values = rewards # Simplified target loss = torch.nn.MSELoss()(current_q_values, target_q_values) print(f" โœ… Loss computed: {loss.item():.6f}") print(f" โœ… Loss requires grad: {loss.requires_grad}") # Backward pass loss.backward() # Check if gradients exist and are finite grad_norms = [] params_with_grad = 0 total_params = 0 for name, param in agent.policy_net.named_parameters(): total_params += 1 if param.grad is not None: params_with_grad += 1 grad_norm = param.grad.norm().item() grad_norms.append(grad_norm) if not torch.isfinite(param.grad).all(): print(f" โŒ Non-finite gradients in {name}") return False print(f" โœ… Parameters with gradients: {params_with_grad}/{total_params}") print(f" โœ… Average gradient norm: {np.mean(grad_norms):.6f}") print(f" โœ… Max gradient norm: {max(grad_norms):.6f}") # Test optimizer step agent.optimizer.step() print(f" โœ… Optimizer step completed successfully") return True def test_training_step(): """Test a complete training step""" print(f"\n๐Ÿ‹๏ธ Testing Complete Training Step...") # Create agent state_shape = (7850,) agent = DQNAgent( state_shape=state_shape, n_actions=3, learning_rate=0.001, batch_size=8, buffer_size=1000 ) # Force disable mixed precision agent.use_mixed_precision = False # Add some experiences for i in range(20): state = np.random.randn(7850).astype(np.float32) action = np.random.randint(0, 3) reward = np.random.randn() * 0.1 next_state = np.random.randn(7850).astype(np.float32) done = np.random.random() < 0.1 agent.remember(state, action, reward, next_state, done) print(f" โœ… Added {len(agent.memory)} experiences to memory") # Test replay training if len(agent.memory) >= agent.batch_size: loss = agent.replay() print(f" โœ… Training completed with loss: {loss:.6f}") if loss > 0: print(f" โœ… Training successful - non-zero loss indicates learning") return True else: print(f" โŒ Training failed - zero loss indicates gradient issues") return False else: print(f" โš ๏ธ Not enough experiences for training") return True def main(): """Run all tests""" print("๐Ÿš€ MASSIVE DQN AGENT TESTING SUITE") print("=" * 50) # Test 1: Architecture try: network = test_dqn_architecture() print(" โœ… Architecture test PASSED") except Exception as e: print(f" โŒ Architecture test FAILED: {e}") return False # Test 2: Gradient flow try: gradient_success = test_gradient_flow() if gradient_success: print(" โœ… Gradient flow test PASSED") else: print(" โŒ Gradient flow test FAILED") return False except Exception as e: print(f" โŒ Gradient flow test FAILED: {e}") return False # Test 3: Training step try: training_success = test_training_step() if training_success: print(" โœ… Training step test PASSED") else: print(" โŒ Training step test FAILED") return False except Exception as e: print(f" โŒ Training step test FAILED: {e}") return False print("\n๐ŸŽ‰ ALL TESTS PASSED!") print("โœ… Massive DQN agent is ready for 50M parameter learning!") return True if __name__ == "__main__": success = main() exit(0 if success else 1)