more features, but dead-end

2025-03-17 00:29:50 +02:00
parent 2e901e18f2
commit d5c291d15c
11 changed files with 9283 additions and 4013 deletions
--- a/crypto/gogo2/.vscode/launch.json
+++ b/crypto/gogo2/.vscode/launch.json
@@ -5,8 +5,8 @@
            "name": "Train Bot",
            "type": "python",
            "request": "launch",
-            "program": "main.py",
+            "program": "main_multiu_broken.py",
-            "args": ["--mode", "train", "--episodes", "100"],
+            "args": ["--mode", "train", "--episodes", "10000"],
            "console": "integratedTerminal",
            "justMyCode": true
        },
@@ -14,7 +14,7 @@
            "name": "Evaluate Bot",
            "type": "python",
            "request": "launch",
-            "program": "main.py",
+            "program": "main_multiu_broken.py",
            "args": ["--mode", "eval", "--episodes", "10"],
            "console": "integratedTerminal",
            "justMyCode": true
@@ -23,7 +23,7 @@
            "name": "Live Trading (Demo)",
            "type": "python",
            "request": "launch",
-            "program": "main.py",
+            "program": "main_multiu_broken.py",
            "args": ["--mode", "live", "--demo"],
            "console": "integratedTerminal",
            "justMyCode": true
@@ -32,7 +32,7 @@
            "name": "Live Trading (Real)",
            "type": "python",
            "request": "launch",
-            "program": "main.py",
+            "program": "main_multiu_broken.py",
            "args": ["--mode", "live"],
            "console": "integratedTerminal",
            "justMyCode": true
@@ -41,7 +41,7 @@
            "name": "Continuous Training",
            "type": "python",
            "request": "launch",
-            "program": "main.py",
+            "program": "main_multiu_broken.py",
            "args": ["--mode", "continuous", "--refresh-data"],
            "console": "integratedTerminal",
            "justMyCode": true
--- a/crypto/gogo2/enhanced_models.py
+++ b/crypto/gogo2/enhanced_models.py
@@ -201,6 +201,28 @@ class EnhancedReplayBuffer:
        self.gamma = gamma  # Discount factor
        self.n_step_buffer = []
        self.max_priority = 1.0
    def add(self, state, action, reward, next_state, done):
        """
        Add a new experience to the buffer (simplified version of push for compatibility)
        Args:
            state: Current state
            action: Action taken
            reward: Reward received
            next_state: Next state
            done: Whether the episode is done
        """
        # Store in replay buffer with max priority
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        self.buffer[self.position] = (state, action, reward, next_state, done)
        # Set priority to max priority to ensure it gets sampled
        self.priorities[self.position] = self.max_priority
        # Move position pointer
        self.position = (self.position + 1) % self.capacity
    def push(self, state, action, reward, next_state, done):
        # Store experience in n-step buffer
@@ -278,15 +300,8 @@ class EnhancedReplayBuffer:
            next_states.append(next_state)
            dones.append(done)
-        return (
+        # Return only the states, actions, rewards, next_states, dones for compatibility with learn function
-            torch.stack(states),
+        return states, actions, rewards, next_states, dones
            torch.tensor(actions),
            torch.tensor(rewards, dtype=torch.float32),
            torch.stack(next_states),
            torch.tensor(dones, dtype=torch.float32),
            indices,
            weights
        )
    def update_priorities(self, indices, td_errors):
        for idx, td_error in zip(indices, td_errors):
--- a/crypto/gogo2/enhanced_training.py
+++ b/crypto/gogo2/enhanced_training.py
@@ -2,6 +2,7 @@ import os
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
 from torch.amp import GradScaler, autocast
 import numpy as np
 import matplotlib.pyplot as plt
@@ -9,7 +10,7 @@ from datetime import datetime
 from tensorboardX import SummaryWriter
 # Import our enhanced models
-from enhanced_models import EnhancedPricePredictionModel, EnhancedDQN, EnhancedReplayBuffer, train_price_predictor, prepare_multi_timeframe_data
+from enhanced_models import EnhancedPricePredictionModel, EnhancedDQN, EnhancedReplayBuffer
 # Constants
 TIMEFRAMES = ['1m', '15m', '1h']
@@ -218,285 +219,453 @@ def load_checkpoint(price_model, dqn_model, optimizer, episode=None):
        print("No checkpoint found, starting training from scratch")
        return 0, [], [], []
-def enhanced_train_agent(exchange, num_episodes=NUM_EPISODES, continuous=CONTINUOUS_MODE, start_episode=CONTINUOUS_START_EPISODE):
+def enhanced_train_agent(exchange, num_episodes=NUM_EPISODES, continuous=CONTINUOUS_MODE, start_episode=CONTINUOUS_START_EPISODE, verbose=False):
    """
-    Train the enhanced trading agent using multi-timeframe data
+    Train an enhanced trading agent with multi-timeframe models
    Args:
-        exchange: Exchange object to fetch data from
+        exchange: Exchange simulator or real exchange
        num_episodes: Number of episodes to train for
        continuous: Whether to continue training from a checkpoint
-        start_episode: Episode to start from if continuous training
+        start_episode: Episode to start from for continuous training
        verbose: Whether to enable verbose logging
    """
    print(f"Training on device: {DEVICE}")
    # Set up TensorBoard
    writer = setup_tensorboard()
    # Initialize models
-    state_dim = 100  # Increased state dimension for multi-timeframe features
+    state_dim = 100  # Increased state dimension for enhanced features
-    action_dim = 3   # Buy, Sell, Hold
+    action_dim = 3   # 0: HOLD, 1: BUY, 2: SELL
    # Initialize price prediction model with multi-timeframe support
    price_model = EnhancedPricePredictionModel(
        input_dim=2,  # Price and volume
        hidden_dim=256,
        num_layers=3,
-        output_dim=5,  # Predict next 5 candles
+        output_dim=5,  # OHLCV prediction
        num_timeframes=len(TIMEFRAMES)
    ).to(DEVICE)
    # Initialize DQN model with enhanced architecture
    dqn_model = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(DEVICE)
-    target_dqn = EnhancedDQN(
+    # Initialize target network
    target_model = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(DEVICE)
-    
+    target_model.load_state_dict(dqn_model.state_dict())
    # Copy initial weights to target network
    target_dqn.load_state_dict(dqn_model.state_dict())
    # Initialize optimizer
-    optimizer = optim.Adam(list(price_model.parameters()) + list(dqn_model.parameters()), lr=LEARNING_RATE)
+    optimizer = optim.Adam(dqn_model.parameters(), lr=LEARNING_RATE)
-    # Initialize replay buffer
+    # Initialize replay buffer with prioritized experience replay
-    replay_buffer = EnhancedReplayBuffer(
+    replay_buffer = EnhancedReplayBuffer(REPLAY_BUFFER_SIZE)
        capacity=REPLAY_BUFFER_SIZE,
        alpha=0.6,
        beta=0.4,
        beta_increment=0.001,
        n_step=3,
        gamma=GAMMA
    )
-    # Initialize gradient scaler for mixed precision training
+    # Initialize training metrics
    scaler = GradScaler(enabled=(DEVICE.type == 'cuda'))
    # Initialize tracking variables
    rewards = []
    profits = []
    win_rates = []
    best_reward = float('-inf')
    best_pnl = float('-inf')
-    best_winrate = float('-inf')
+    best_winrate = 0
-    # Load checkpoint if continuous training
+    # Initialize epsilon for exploration
    epsilon = EPSILON_START
    # Load checkpoint if continuing training
    if continuous:
-        start_episode, rewards, profits, win_rates = load_checkpoint(
+        try:
-            price_model, dqn_model, optimizer, start_episode
+            checkpoint_path = 'models/enhanced_trading_agent_latest.pt'
-        )
+            if os.path.exists(checkpoint_path):
                checkpoint = torch.load(checkpoint_path, map_location=DEVICE)
                price_model.load_state_dict(checkpoint['price_model_state_dict'])
                dqn_model.load_state_dict(checkpoint['dqn_model_state_dict'])
                target_model.load_state_dict(checkpoint['dqn_model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                if 'rewards' in checkpoint:
                    rewards = checkpoint['rewards']
                if 'profits' in checkpoint:
                    profits = checkpoint['profits']
                if 'win_rates' in checkpoint:
                    win_rates = checkpoint['win_rates']
                if 'best_reward' in checkpoint:
                    best_reward = checkpoint['best_reward']
                if 'best_pnl' in checkpoint:
                    best_pnl = checkpoint['best_pnl']
                if 'best_winrate' in checkpoint:
                    best_winrate = checkpoint['best_winrate']
                print(f"Loaded checkpoint from {checkpoint_path}")
                print(f"Continuing from episode {start_episode}")
                # Decay epsilon based on start episode
                for _ in range(start_episode):
                    epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)
            else:
                print(f"No checkpoint found at {checkpoint_path}, starting from scratch")
        except Exception as e:
            print(f"Error loading checkpoint: {e}")
            print("Starting from scratch")
-    # Prepare multi-timeframe data for price prediction model training
+    # Set models to training mode
-    data_loaders = prepare_multi_timeframe_data(exchange, TIMEFRAMES)
+    price_model.train()
    dqn_model.train()
    # Initialize gradient scaler for mixed precision training
    scaler = GradScaler()
    print(f"Training on device: {DEVICE}")
    # Pre-train price prediction model
    print("Pre-training price prediction model...")
-    train_price_predictor(price_model, data_loaders, optimizer, DEVICE, epochs=5)
+    train_price_predictor(price_model, exchange, TIMEFRAMES, DEVICE, num_epochs=5, batch_size=32)
    # Main training loop
-    epsilon = EPSILON_START
+    for episode in range(start_episode, start_episode + num_episodes):
-    
+        # Initialize state
    for episode in range(start_episode, num_episodes):
        print(f"Episode {episode+1}/{num_episodes}")
        # Reset environment
        state = initialize_state(exchange, TIMEFRAMES)
-        total_reward = 0
+        
        # Reset environment for new episode
        exchange.reset()
        # Track episode metrics
        episode_reward = 0
        episode_steps = 0
        done = False
        trades = []
        wins = 0
        losses = 0
        # Enable verbose logging for prediction validation if requested
        if verbose:
            # Set logging level to DEBUG for more detailed logs
            import logging
            logging.getLogger("trading_bot").setLevel(logging.DEBUG)
            # Add a hook to log prediction validations
            def log_prediction_validation(pred, actual, was_correct):
                if was_correct:
                    print(f"CORRECT prediction: predicted={pred:.2f}, actual={actual:.2f}")
                else:
                    print(f"INCORRECT prediction: predicted={pred:.2f}, actual={actual:.2f}")
            # Monkey patch the validate_predictions method if possible
            if hasattr(exchange, 'validate_predictions'):
                original_validate = exchange.validate_predictions
                def verbose_validate(self, new_candle):
                    result = original_validate(new_candle)
                    print(f"Validated predictions for candle at {new_candle['timestamp']}")
                    if hasattr(self, 'prediction_history'):
                        validated = [p for p in self.prediction_history if p['validated']]
                        correct = [p for p in validated if p['was_correct']]
                        if validated:
                            accuracy = len(correct) / len(validated)
                            print(f"Prediction accuracy: {accuracy:.2f} ({len(correct)}/{len(validated)})")
                    return result
                exchange.validate_predictions = verbose_validate.__get__(exchange, exchange.__class__)
        # Episode loop
-        for step in range(MAX_STEPS_PER_EPISODE):
+        while not done and episode_steps < MAX_STEPS_PER_EPISODE:
            # Epsilon-greedy action selection
            if np.random.random() < epsilon:
                action = np.random.randint(0, action_dim)
            else:
                with torch.no_grad():
-                    state_tensor = torch.FloatTensor(state).unsqueeze(0).to(DEVICE)
+                    q_values = dqn_model(torch.FloatTensor(state).unsqueeze(0).to(DEVICE))
                    q_values, _, _ = dqn_model(state_tensor)
                    action = q_values.argmax().item()
-            # Execute action and get next state and reward
+            # Take action in environment
-            next_state, reward, done, trade_info = step_environment(
+            next_state, reward, done, info = exchange.step(action)
                exchange, state, action, price_model, TIMEFRAMES, DEVICE
            )
            # Store transition in replay buffer
-            replay_buffer.push(
+            replay_buffer.add(state, action, reward, next_state, done)
                torch.FloatTensor(state),
                action,
                reward,
                torch.FloatTensor(next_state),
                done
            )
            # Update state and accumulate reward
            state = next_state
-            total_reward += reward
+            episode_reward += reward
            # Track trade outcomes
-            if trade_info is not None:
+            if 'trade' in info and info['trade']:
-                trades.append(trade_info)
+                trades.append(info['trade'])
-                if trade_info['pnl'] > 0:
+                if 'pnl_dollar' in info['trade']:
-                    wins += 1
+                    if info['trade']['pnl_dollar'] > 0:
-                elif trade_info['pnl'] < 0:
+                        wins += 1
-                    losses += 1
+                    elif info['trade']['pnl_dollar'] < 0:
                        losses += 1
                    # Log trade to TensorBoard
                    if writer:
                        writer.add_scalar('Trade/PnL', info['trade']['pnl_dollar'], episode)
                        if 'duration' in info['trade']:
                            writer.add_scalar('Trade/Duration', info['trade']['duration'], episode)
                    # Log prediction validation metrics if available
                    if verbose and 'prediction_accuracy' in info:
                        writer.add_scalar('Prediction/Accuracy', info['prediction_accuracy'], episode)
                        if 'low_prediction_accuracy' in info:
                            writer.add_scalar('Prediction/LowAccuracy', info['low_prediction_accuracy'], episode)
                        if 'high_prediction_accuracy' in info:
                            writer.add_scalar('Prediction/HighAccuracy', info['high_prediction_accuracy'], episode)
            # Learn from experiences if enough samples
            if len(replay_buffer) > BATCH_SIZE:
-                learn(dqn_model, target_dqn, replay_buffer, optimizer, scaler, DEVICE)
+                learn(dqn_model, target_model, replay_buffer, optimizer, scaler, DEVICE)
-            if done:
+            episode_steps += 1
-                break
+            
            # Log verbose information about the current state if requested
            if verbose and episode_steps % 10 == 0:
                print(f"Episode {episode+1}, Step {episode_steps}, Action: {['HOLD', 'BUY', 'SELL'][action]}, Reward: {reward:.4f}")
                if hasattr(exchange, 'position') and exchange.position != 'FLAT':
                    print(f"  Position: {exchange.position}, Entry Price: {exchange.entry_price:.2f}, Current PnL: {exchange.calculate_pnl():.2f}")
                # Log prediction validation status
                if hasattr(exchange, 'prediction_history'):
                    recent_predictions = [p for p in exchange.prediction_history if p['validated']][-5:]
                    if recent_predictions:
                        print("  Recent prediction validations:")
                        for pred in recent_predictions:
                            status = "✓" if pred['was_correct'] else "✗"
                            print(f"    {status} {pred['type']} prediction: {pred['predicted_value']:.2f} vs {pred['actual_value']:.2f}")
        # Update target network
        if episode % TARGET_UPDATE == 0:
-            target_dqn.load_state_dict(dqn_model.state_dict())
+            target_model.load_state_dict(dqn_model.state_dict())
        # Calculate episode metrics
-        avg_reward = total_reward / (step + 1)
+        avg_reward = episode_reward / max(1, episode_steps)
-        total_pnl = sum(trade['pnl'] for trade in trades) if trades else 0
+        total_pnl = sum(trade.get('pnl_dollar', 0) for trade in trades) if trades else 0
        win_rate = (wins / (wins + losses) * 100) if (wins + losses) > 0 else 0
-        # Decay epsilon
+        # Store metrics
        epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)
        # Track metrics
        rewards.append(avg_reward)
        profits.append(total_pnl)
        win_rates.append(win_rate)
        # Decay epsilon
        epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)
        # Log episode metrics
        print(f"Episode {episode+1}: Reward={avg_reward:.4f}, PnL=${total_pnl:.2f}, Win Rate={win_rate:.1f}%, Epsilon={epsilon:.4f}")
        # Log to TensorBoard
-        writer.add_scalar('Training/Reward', avg_reward, episode)
+        if writer:
-        writer.add_scalar('Training/Profit', total_pnl, episode)
+            writer.add_scalar('Metrics/Reward', avg_reward, episode)
-        writer.add_scalar('Training/WinRate', win_rate, episode)
+            writer.add_scalar('Metrics/PnL', total_pnl, episode)
-        writer.add_scalar('Training/Epsilon', epsilon, episode)
+            writer.add_scalar('Metrics/WinRate', win_rate, episode)
            writer.add_scalar('Metrics/Epsilon', epsilon, episode)
            # Log prediction validation metrics if available
            if hasattr(exchange, 'prediction_history'):
                validated_predictions = [p for p in exchange.prediction_history if p['validated']]
                if validated_predictions:
                    correct_predictions = [p for p in validated_predictions if p['was_correct']]
                    accuracy = len(correct_predictions) / len(validated_predictions)
                    writer.add_scalar('Prediction/OverallAccuracy', accuracy, episode)
                    # Log separate accuracies for low and high predictions
                    low_predictions = [p for p in validated_predictions if p['type'] == 'low']
                    high_predictions = [p for p in validated_predictions if p['type'] == 'high']
                    if low_predictions:
                        correct_lows = [p for p in low_predictions if p['was_correct']]
                        low_accuracy = len(correct_lows) / len(low_predictions)
                        writer.add_scalar('Prediction/LowAccuracy', low_accuracy, episode)
                    if high_predictions:
                        correct_highs = [p for p in high_predictions if p['was_correct']]
                        high_accuracy = len(correct_highs) / len(high_predictions)
                        writer.add_scalar('Prediction/HighAccuracy', high_accuracy, episode)
                    if verbose:
                        print(f"Prediction accuracy: {accuracy:.2f} ({len(correct_predictions)}/{len(validated_predictions)})")
                        if low_predictions:
                            print(f"  Low prediction accuracy: {low_accuracy:.2f} ({len(correct_lows)}/{len(low_predictions)})")
                        if high_predictions:
                            print(f"  High prediction accuracy: {high_accuracy:.2f} ({len(correct_highs)}/{len(high_predictions)})")
-        # Print episode summary
+        # Save best models
-        print(f"Episode {episode+1} - Avg Reward: {avg_reward:.2f}, PnL: ${total_pnl:.2f}, Win Rate: {win_rate:.1f}%")
+        if avg_reward > best_reward:
            best_reward = avg_reward
            torch.save({
                'price_model_state_dict': price_model.state_dict(),
                'dqn_model_state_dict': dqn_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'episode': episode,
                'reward': best_reward,
                'pnl': total_pnl,
                'win_rate': win_rate
            }, 'models/enhanced_trading_agent_best_reward.pt')
            print(f"Saved best reward model: {best_reward:.4f}")
-        # Save models and plot results
+        if total_pnl > best_pnl:
-        if episode % SAVE_INTERVAL == 0 or episode == num_episodes - 1:
+            best_pnl = total_pnl
-            best_reward, best_pnl, best_winrate = save_models(
+            torch.save({
-                price_model, dqn_model, optimizer, episode, 
+                'price_model_state_dict': price_model.state_dict(),
-                rewards, profits, win_rates, 
+                'dqn_model_state_dict': dqn_model.state_dict(),
-                best_reward, best_pnl, best_winrate
+                'optimizer_state_dict': optimizer.state_dict(),
-            )
+                'episode': episode,
-            plot_training_results(rewards, profits, win_rates, episode)
+                'reward': avg_reward,
                'pnl': best_pnl,
                'win_rate': win_rate
            }, 'models/enhanced_trading_agent_best_pnl.pt')
            print(f"Saved best PnL model: ${best_pnl:.2f}")
        if win_rate > best_winrate:
            best_winrate = win_rate
            torch.save({
                'price_model_state_dict': price_model.state_dict(),
                'dqn_model_state_dict': dqn_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'episode': episode,
                'reward': avg_reward,
                'pnl': total_pnl,
                'win_rate': best_winrate
            }, 'models/enhanced_trading_agent_best_winrate.pt')
            print(f"Saved best win rate model: {best_winrate:.1f}%")
        # Save latest model for continuous training
        torch.save({
            'price_model_state_dict': price_model.state_dict(),
            'dqn_model_state_dict': dqn_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'episode': episode,
            'rewards': rewards,
            'profits': profits,
            'win_rates': win_rates,
            'best_reward': best_reward,
            'best_pnl': best_pnl,
            'best_winrate': best_winrate
        }, 'models/enhanced_trading_agent_latest.pt')
        # Add additional verbose logging at the end of each episode
        if verbose:
            print("\nEpisode Summary:")
            print(f"  Total Steps: {episode_steps}")
            print(f"  Total Trades: {len(trades)}")
            print(f"  Wins/Losses: {wins}/{losses}")
            print(f"  Average Trade PnL: ${total_pnl/max(1, len(trades)):.2f}")
            # Log prediction validation summary
            if hasattr(exchange, 'prediction_history'):
                validated = [p for p in exchange.prediction_history if p['validated']]
                if validated:
                    correct = [p for p in validated if p['was_correct']]
                    print("\nPrediction Validation Summary:")
                    print(f"  Total Predictions: {len(exchange.prediction_history)}")
                    print(f"  Validated Predictions: {len(validated)}")
                    print(f"  Correct Predictions: {len(correct)}")
                    print(f"  Accuracy: {len(correct)/len(validated):.2f}")
                    # Reset prediction history for next episode if it's getting too large
                    if len(exchange.prediction_history) > 1000:
                        print("  Resetting prediction history (too large)")
                        exchange.prediction_history = exchange.prediction_history[-100:]
-    # Close TensorBoard writer
+    # Return final metrics
-    writer.close()
+    return rewards, profits, win_rates
    # Final save and plot
    best_reward, best_pnl, best_winrate = save_models(
        price_model, dqn_model, optimizer, num_episodes - 1, 
        rewards, profits, win_rates, 
        best_reward, best_pnl, best_winrate
    )
    plot_training_results(rewards, profits, win_rates, num_episodes - 1)
    print("Training complete!")
    return price_model, dqn_model
 def learn(dqn, target_dqn, replay_buffer, optimizer, scaler, device):
-    """Update the DQN model using experiences from the replay buffer"""
+    """
-    # Sample from replay buffer
+    Update the DQN model using experiences from the replay buffer
    states, actions, rewards, next_states, dones, indices, weights = replay_buffer.sample(BATCH_SIZE)
-    # Move to device
+    Args:
-    states = states.to(device)
+        dqn: The DQN model to update
-    actions = actions.to(device)
+        target_dqn: The target DQN model for stable Q-value estimates
-    rewards = rewards.to(device)
+        replay_buffer: Replay buffer containing experiences
-    next_states = next_states.to(device)
+        optimizer: Optimizer for updating the DQN model
-    dones = dones.to(device)
+        scaler: Gradient scaler for mixed precision training
-    weights = weights.to(device)
+        device: Device to train on (CPU or GPU)
    """
    if len(replay_buffer) < BATCH_SIZE:
        return
    # Sample batch from replay buffer
    states, actions, rewards, next_states, dones = replay_buffer.sample(BATCH_SIZE)
    # Convert to tensors
    states = torch.FloatTensor(states).to(device)
    actions = torch.LongTensor(actions).to(device)
    rewards = torch.FloatTensor(rewards).to(device)
    next_states = torch.FloatTensor(next_states).to(device)
    dones = torch.FloatTensor(dones).to(device)
    # Get current Q values
-    if device.type == 'cuda':
+    with autocast(device_type='cuda' if device.type == 'cuda' else 'cpu'):
-        with autocast(device_type='cuda', enabled=True):
+        q_values = dqn(states)
-            current_q_values, _, _ = dqn(states)
+        if isinstance(q_values, tuple):
-            current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
+            q_values = q_values[0]  # Extract the tensor from the tuple
-            
+        q_values = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
            # Compute target Q values
            with torch.no_grad():
                next_q_values, _, _ = target_dqn(next_states)
                max_next_q_values = next_q_values.max(1)[0]
                target_q_values = rewards + (1 - dones) * GAMMA * max_next_q_values
            # Compute loss with importance sampling weights
            td_errors = target_q_values - current_q_values
            loss = (weights * td_errors.pow(2)).mean()
    else:
        # CPU version without autocast
        current_q_values, _, _ = dqn(states)
        current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
-        # Compute target Q values
+        # Get next Q values from target network
        with torch.no_grad():
-            next_q_values, _, _ = target_dqn(next_states)
+            next_q_values = target_dqn(next_states)
-            max_next_q_values = next_q_values.max(1)[0]
+            if isinstance(next_q_values, tuple):
-            target_q_values = rewards + (1 - dones) * GAMMA * max_next_q_values
+                next_q_values = next_q_values[0]  # Extract the tensor from the tuple
            next_q_values = next_q_values.max(1)[0]
            target_q_values = rewards + GAMMA * next_q_values * (1 - dones)
-        # Compute loss with importance sampling weights
+        # Calculate loss
-        td_errors = target_q_values - current_q_values
+        loss = F.smooth_l1_loss(q_values, target_q_values)
        loss = (weights * td_errors.pow(2)).mean()
-    # Update priorities in replay buffer
+    # Optimize the model
    replay_buffer.update_priorities(indices, td_errors.abs().detach().cpu().numpy())
    # Optimize the model with mixed precision
    optimizer.zero_grad()
-    
+    scaler.scale(loss).backward()
-    if device.type == 'cuda':
+    scaler.step(optimizer)
-        scaler.scale(loss).backward()
+    scaler.update()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(dqn.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
    else:
        # CPU version without scaler
        loss.backward()
        torch.nn.utils.clip_grad_norm_(dqn.parameters(), max_norm=1.0)
        optimizer.step()
 def initialize_state(exchange, timeframes):
-    """Initialize the state with data from multiple timeframes"""
+    """
    Initialize the state for the trading agent using multi-timeframe data
    Args:
        exchange: Exchange object to fetch data from
        timeframes: List of timeframes to use
    Returns:
        state: Initial state vector
    """
    # Initialize empty state
    state = []
    # Fetch data for each timeframe
    timeframe_data = {}
    for tf in timeframes:
        candles = exchange.fetch_ohlcv(timeframe=tf, limit=30)
-        timeframe_data[tf] = candles
+        if not candles or len(candles) < 10:
-    
+            print(f"Not enough data for timeframe {tf}, using zeros")
-    # Extract features from each timeframe
+            # Use zeros if not enough data
-    state = []
+            state.extend([0] * 20)  # 20 features per timeframe
-    
+            continue
    for tf in timeframes:
        candles = timeframe_data[tf]
-        # Price features
+        timeframe_data[tf] = candles
        # Extract features for this timeframe
        prices = [candle[4] for candle in candles[-10:]]  # Last 10 close prices
        price_changes = [prices[i]/prices[i-1] - 1 for i in range(1, len(prices))]
        # Volume features
        volumes = [candle[5] for candle in candles[-10:]]  # Last 10 volumes
        volume_changes = [volumes[i]/volumes[i-1] - 1 for i in range(1, len(volumes))]
        # Technical indicators
        # Simple Moving Averages
        sma_5 = sum(prices[-5:]) / 5
        sma_10 = sum(prices) / 10
-        # Relative Strength Index (simplified)
+        # RSI (simplified)
-        gains = [max(0, price_changes[i]) for i in range(len(price_changes))]
+        gains = [max(0, pc) for pc in price_changes]
-        losses = [max(0, -price_changes[i]) for i in range(len(price_changes))]
+        losses = [max(0, -pc) for pc in price_changes]
-        avg_gain = sum(gains) / len(gains)
+        avg_gain = sum(gains) / len(gains) if gains else 0
-        avg_loss = sum(losses) / len(losses)
+        avg_loss = sum(losses) / len(losses) if losses else 1e-10
-        rs = avg_gain / (avg_loss + 1e-10)  # Avoid division by zero
+        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        # Add features to state
@@ -506,85 +675,15 @@ def initialize_state(exchange, timeframes):
        state.append(sma_10 / prices[-1] - 1)  # 1 feature
        state.append(rsi / 100)  # 1 feature
-    # Add market regime features
+    # Add market regime features (placeholder)
-    # This is a placeholder - in a real implementation, you would use the market_regime_classifier
+    state.extend([0, 0, 0])  # 3 features for market regime
    # from the DQN model to predict the current market regime
    state.extend([0, 0, 0])  # 3 features for market regime (one-hot encoded)
-    # Add additional features to reach the expected dimension of 100
+    # Pad state to reach expected dimension of 100
-    # Calculate more technical indicators
+    if len(state) < 100:
-    for tf in timeframes:
+        state.extend([0] * (100 - len(state)))
-        candles = timeframe_data[tf]
+    elif len(state) > 100:
        prices = [candle[4] for candle in candles[-20:]]  # Last 20 close prices
        # Bollinger Bands
        window = 20
        if len(prices) >= window:
            sma_20 = sum(prices[-window:]) / window
            std_dev = (sum((price - sma_20) ** 2 for price in prices[-window:]) / window) ** 0.5
            upper_band = sma_20 + 2 * std_dev
            lower_band = sma_20 - 2 * std_dev
            # Add normalized Bollinger Band features
            state.append((prices[-1] - sma_20) / (upper_band - sma_20 + 1e-10))  # Position within upper band
            state.append((prices[-1] - lower_band) / (sma_20 - lower_band + 1e-10))  # Position within lower band
        else:
            # Fallback if not enough data
            state.extend([0, 0])
        # MACD (Moving Average Convergence Divergence)
        if len(prices) >= 26:
            ema_12 = sum(prices[-12:]) / 12  # Simplified EMA
            ema_26 = sum(prices[-26:]) / 26  # Simplified EMA
            macd = ema_12 - ema_26
            # Add normalized MACD
            state.append(macd / prices[-1])
        else:
            # Fallback if not enough data
            state.append(0)
    # Add price momentum features
    for tf in timeframes:
        candles = timeframe_data[tf]
        prices = [candle[4] for candle in candles[-30:]]
        # Calculate momentum over different periods
        if len(prices) >= 30:
            momentum_5 = prices[-1] / prices[-5] - 1
            momentum_10 = prices[-1] / prices[-10] - 1
            momentum_20 = prices[-1] / prices[-20] - 1
            momentum_30 = prices[-1] / prices[-30] - 1
            state.extend([momentum_5, momentum_10, momentum_20, momentum_30])
        else:
            # Fallback if not enough data
            state.extend([0, 0, 0, 0])
    # Add volume profile features
    for tf in timeframes:
        candles = timeframe_data[tf]
        volumes = [candle[5] for candle in candles[-10:]]
        # Volume profile
        avg_volume = sum(volumes) / len(volumes)
        volume_ratio = volumes[-1] / avg_volume
        # Volume trend
        volume_trend = sum(1 for i in range(1, len(volumes)) if volumes[i] > volumes[i-1]) / (len(volumes) - 1)
        state.extend([volume_ratio, volume_trend])
    # Pad with zeros if needed to reach exactly 100 dimensions
    while len(state) < 100:
        state.append(0)
    # Ensure state has exactly 100 dimensions
    if len(state) > 100:
        state = state[:100]
    assert len(state) == 100, f"State dimension mismatch: {len(state)} != 100"
    return state
 def step_environment(exchange, state, action, price_model, timeframes, device):
@@ -744,6 +843,145 @@ def step_environment(exchange, state, action, price_model, timeframes, device):
    return next_state, reward, done, trade_info
 def train_price_predictor(model, exchange, timeframes, device, num_epochs=5, batch_size=32):
    """
    Train the price prediction model using data from the exchange
    Args:
        model: The EnhancedPricePredictionModel
        exchange: Exchange object to fetch data from
        timeframes: List of timeframes to use
        device: Device to train on (CPU or GPU)
        num_epochs: Number of training epochs
        batch_size: Batch size for training
    """
    print(f"Training price prediction model for {num_epochs} epochs with batch size {batch_size}")
    # Create optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    # Set model to training mode
    model.train()
    # Fetch data for each timeframe
    timeframe_data = {}
    for tf in timeframes:
        # Fetch more data for training
        candles = exchange.fetch_ohlcv(timeframe=tf, limit=500)
        if not candles or len(candles) < 30:
            print(f"Not enough data for timeframe {tf}, skipping training")
            return model
        timeframe_data[tf] = candles
    # Prepare training data
    for epoch in range(num_epochs):
        total_loss = 0
        num_batches = 0
        # Create batches
        for i in range(0, len(timeframe_data[timeframes[0]]) - 30, batch_size):
            if i + 30 + 5 >= len(timeframe_data[timeframes[0]]):
                break
            # Prepare inputs for each timeframe
            inputs_list = []
            for tf in timeframes:
                if i + 30 >= len(timeframe_data[tf]):
                    continue
                # Extract price and volume data for input
                input_data = []
                for j in range(i, i + 30):
                    if j < len(timeframe_data[tf]):
                        candle = timeframe_data[tf][j]
                        # Use close price and volume
                        input_data.append([candle[4], candle[5]])
                # Convert to tensor and add batch dimension
                input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
                inputs_list.append(input_tensor)
            # Skip if we don't have data for all timeframes
            if len(inputs_list) != len(timeframes):
                continue
            # Prepare targets (next 5 candles)
            target_data = []
            for j in range(i + 30, i + 35):
                if j < len(timeframe_data[timeframes[0]]):
                    candle = timeframe_data[timeframes[0]][j]
                    # OHLCV values
                    target_data.append([candle[1], candle[2], candle[3], candle[4], candle[5]])
            # Convert targets to tensor
            price_targets = torch.tensor(target_data, dtype=torch.float32).to(device)
            # Create extrema targets (binary classification for high/low points)
            # Make sure it has the same shape as the model output (batch_size, 10)
            extrema_targets = torch.zeros(1, 10, dtype=torch.float32).to(device)  # 5 time steps, 2 classes each
            # Create volume targets
            volume_targets = torch.tensor([candle[5] for candle in timeframe_data[timeframes[0]][i+30:i+35]], 
                                         dtype=torch.float32).to(device)
            # Zero gradients
            optimizer.zero_grad()
            try:
                # Forward pass
                price_pred, extrema_logits, volume_pred = model(inputs_list)
                # Ensure targets have the same shape as predictions
                if price_pred.shape != price_targets.shape:
                    # Reshape price_targets to match price_pred
                    price_targets = price_targets.view(price_pred.shape)
                if volume_pred.shape != volume_targets.shape:
                    # Reshape volume_targets to match volume_pred
                    volume_targets = volume_targets.view(volume_pred.shape)
                # Calculate losses
                price_loss = F.mse_loss(price_pred, price_targets)
                extrema_loss = F.binary_cross_entropy_with_logits(extrema_logits, extrema_targets)
                volume_loss = F.mse_loss(volume_pred, volume_targets)
                # Combined loss with weighting
                loss = price_loss + 0.5 * extrema_loss + 0.3 * volume_loss
                # Backward pass
                loss.backward()
                # Gradient clipping to prevent exploding gradients
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                total_loss += loss.item()
                num_batches += 1
                if num_batches % 10 == 0:
                    print(f"Epoch {epoch+1}/{num_epochs}, Batch {num_batches}, Loss: {loss.item():.6f}")
            except Exception as e:
                print(f"Error in batch: {e}")
                print(f"Shapes - price_pred: {price_pred.shape}, price_targets: {price_targets.shape}")
                print(f"Shapes - extrema_logits: {extrema_logits.shape}, extrema_targets: {extrema_targets.shape}")
                print(f"Shapes - volume_pred: {volume_pred.shape}, volume_targets: {volume_targets.shape}")
                continue
        if num_batches > 0:
            avg_loss = total_loss / num_batches
            print(f"Epoch {epoch+1}/{num_epochs}, Avg Loss: {avg_loss:.6f}")
        else:
            print(f"Epoch {epoch+1}/{num_epochs}, No batches processed")
        # Learning rate scheduling
        if epoch > 0 and epoch % 2 == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9
    print("Price prediction model training completed")
    return model
 # Main function to run training
 def main():
    from exchange_simulator import ExchangeSimulator
@@ -752,7 +990,7 @@ def main():
    exchange = ExchangeSimulator()
    # Train agent
-    price_model, dqn_model = enhanced_train_agent(
+    rewards, profits, win_rates = enhanced_train_agent(
        exchange=exchange,
        num_episodes=NUM_EPISODES,
        continuous=CONTINUOUS_MODE,
--- a/crypto/gogo2/exchange_simulator.py
+++ b/crypto/gogo2/exchange_simulator.py
@@ -344,6 +344,439 @@ class ExchangeSimulator:
                'BTC': 0.5
            }
        }
    def reset(self):
        """
        Reset the exchange simulator to its initial state
        Returns:
            Self for method chaining
        """
        # Reset timestamp
        self.current_timestamp = datetime.now()
        # Regenerate data for each timeframe
        for tf in self.timeframes:
            self._generate_initial_data(tf)
        # Reset any internal state
        self.position = 'flat'
        self.position_size = 0
        self.entry_price = 0
        self.stop_loss = 0
        self.take_profit = 0
        # Reset prediction history if it exists
        if hasattr(self, 'prediction_history'):
            self.prediction_history = []
        return self
    def step(self, action):
        """
        Take a step in the environment by executing an action
        Args:
            action: Action to take (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT)
        Returns:
            next_state: Next state after taking action
            reward: Reward received
            done: Whether episode is done
            info: Additional information
        """
        # Get current price
        current_price = self.data['1m'][-1][4]
        # Initialize info dictionary
        info = {
            'price': current_price,
            'timestamp': self.data['1m'][-1][0],
            'trade': None
        }
        # Process action
        if action == 0:  # HOLD
            pass  # No action needed
        elif action == 1:  # BUY/LONG
            if self.position == 'flat':
                # Open a new long position
                self.position = 'long'
                self.entry_price = current_price
                self.position_size = 100  # Simplified position sizing
                # Set stop loss and take profit levels
                self.stop_loss = current_price * 0.99  # 1% stop loss
                self.take_profit = current_price * 1.02  # 2% take profit
                # Record entry time
                self.entry_time = self.data['1m'][-1][0]
                # Add to info
                info['trade'] = {
                    'type': 'long',
                    'entry': current_price,
                    'entry_time': self.data['1m'][-1][0],
                    'size': self.position_size,
                    'stop_loss': self.stop_loss,
                    'take_profit': self.take_profit
                }
            elif self.position == 'short':
                # Close short position and open long
                pnl = self.entry_price - current_price
                pnl_percent = pnl / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'short',
                    'entry': self.entry_price,
                    'exit': current_price,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Open new long position
                self.position = 'long'
                self.entry_price = current_price
                self.position_size = 100  # Simplified position sizing
                # Set stop loss and take profit levels
                self.stop_loss = current_price * 0.99  # 1% stop loss
                self.take_profit = current_price * 1.02  # 2% take profit
                # Record entry time
                self.entry_time = self.data['1m'][-1][0]
        elif action == 2:  # SELL/SHORT
            if self.position == 'flat':
                # Open a new short position
                self.position = 'short'
                self.entry_price = current_price
                self.position_size = 100  # Simplified position sizing
                # Set stop loss and take profit levels
                self.stop_loss = current_price * 1.01  # 1% stop loss
                self.take_profit = current_price * 0.98  # 2% take profit
                # Record entry time
                self.entry_time = self.data['1m'][-1][0]
                # Add to info
                info['trade'] = {
                    'type': 'short',
                    'entry': current_price,
                    'entry_time': self.data['1m'][-1][0],
                    'size': self.position_size,
                    'stop_loss': self.stop_loss,
                    'take_profit': self.take_profit
                }
            elif self.position == 'long':
                # Close long position and open short
                pnl = current_price - self.entry_price
                pnl_percent = pnl / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'long',
                    'entry': self.entry_price,
                    'exit': current_price,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Open new short position
                self.position = 'short'
                self.entry_price = current_price
                self.position_size = 100  # Simplified position sizing
                # Set stop loss and take profit levels
                self.stop_loss = current_price * 1.01  # 1% stop loss
                self.take_profit = current_price * 0.98  # 2% take profit
                # Record entry time
                self.entry_time = self.data['1m'][-1][0]
        # Generate next candle
        self._add_new_candle('1m')
        # Check if stop loss or take profit has been hit
        self._check_sl_tp(info)
        # Validate predictions if available
        if hasattr(self, 'prediction_history') and len(self.prediction_history) > 0:
            self.validate_predictions(self.data['1m'][-1])
        # Prepare next state (simplified)
        next_state = self._get_state()
        # Calculate reward (simplified)
        reward = 0
        if info['trade'] is not None and 'pnl_dollar' in info['trade']:
            reward = info['trade']['pnl_dollar']
        # Check if done (simplified)
        done = False
        return next_state, reward, done, info
    def _get_state(self):
        """
        Get the current state of the environment
        Returns:
            List representing the current state
        """
        # Simplified state representation
        state = []
        # Add price features
        for tf in ['1m', '5m', '15m']:
            if tf in self.data:
                # Get last 10 candles
                candles = self.data[tf][-10:]
                # Extract close prices
                prices = [candle[4] for candle in candles]
                # Calculate price changes
                price_changes = [prices[i]/prices[i-1] - 1 for i in range(1, len(prices))]
                # Add to state
                state.extend(price_changes)
                # Add current price relative to SMA
                sma_5 = sum(prices[-5:]) / 5
                sma_10 = sum(prices) / 10
                state.append(prices[-1] / sma_5 - 1)
                state.append(prices[-1] / sma_10 - 1)
        # Pad state to 100 dimensions
        while len(state) < 100:
            state.append(0)
        # Ensure state has exactly 100 dimensions
        if len(state) > 100:
            state = state[:100]
        return state
    def _check_sl_tp(self, info):
        """
        Check if stop loss or take profit has been hit
        Args:
            info: Info dictionary to update
        """
        if self.position == 'flat':
            return
        # Get current price
        current_price = self.data['1m'][-1][4]
        if self.position == 'long':
            # Check stop loss
            if current_price <= self.stop_loss:
                # Stop loss hit
                pnl_percent = (self.stop_loss - self.entry_price) / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'long',
                    'entry': self.entry_price,
                    'exit': self.stop_loss,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'reason': 'stop_loss',
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Reset position
                self.position = 'flat'
                self.entry_price = 0
                self.position_size = 0
                self.stop_loss = 0
                self.take_profit = 0
            # Check take profit
            elif current_price >= self.take_profit:
                # Take profit hit
                pnl_percent = (self.take_profit - self.entry_price) / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'long',
                    'entry': self.entry_price,
                    'exit': self.take_profit,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'reason': 'take_profit',
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Reset position
                self.position = 'flat'
                self.entry_price = 0
                self.position_size = 0
                self.stop_loss = 0
                self.take_profit = 0
        elif self.position == 'short':
            # Check stop loss
            if current_price >= self.stop_loss:
                # Stop loss hit
                pnl_percent = (self.entry_price - self.stop_loss) / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'short',
                    'entry': self.entry_price,
                    'exit': self.stop_loss,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'reason': 'stop_loss',
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Reset position
                self.position = 'flat'
                self.entry_price = 0
                self.position_size = 0
                self.stop_loss = 0
                self.take_profit = 0
            # Check take profit
            elif current_price <= self.take_profit:
                # Take profit hit
                pnl_percent = (self.entry_price - self.take_profit) / self.entry_price * 100
                pnl_dollar = pnl_percent / 100 * self.position_size
                # Add to info
                info['trade'] = {
                    'type': 'short',
                    'entry': self.entry_price,
                    'exit': self.take_profit,
                    'entry_time': self.entry_time,
                    'exit_time': self.data['1m'][-1][0],
                    'pnl_percent': pnl_percent,
                    'pnl_dollar': pnl_dollar,
                    'reason': 'take_profit',
                    'duration': (self.data['1m'][-1][0] - self.entry_time) / (1000 * 60)  # Duration in minutes
                }
                # Reset position
                self.position = 'flat'
                self.entry_price = 0
                self.position_size = 0
                self.stop_loss = 0
                self.take_profit = 0
    def validate_predictions(self, new_candle):
        """
        Validate previous extrema predictions against new candle data
        Args:
            new_candle: New candle data to validate against
        """
        if not hasattr(self, 'prediction_history') or not self.prediction_history:
            return
        # Extract candle data
        timestamp = new_candle[0]
        high_price = new_candle[2]
        low_price = new_candle[3]
        # Track validation metrics
        validated_count = 0
        correct_count = 0
        # Check each prediction that hasn't been validated yet
        for pred in self.prediction_history:
            if pred.get('validated', False):
                continue
            # Check if this prediction's time has come (or passed)
            if 'predicted_timestamp' in pred and timestamp >= pred['predicted_timestamp']:
                pred['validated'] = True
                validated_count += 1
                # Check if prediction was correct
                if pred['type'] == 'low':
                    # A low prediction is correct if price went within 0.5% of predicted low
                    price_diff_percent = abs(low_price - pred['price']) / pred['price'] * 100
                    pred['actual_price'] = low_price
                    pred['price_diff_percent'] = price_diff_percent
                    # Consider correct if within 0.5% or price went lower than predicted
                    was_correct = price_diff_percent < 0.5 or low_price <= pred['price']
                    pred['was_correct'] = was_correct
                    if was_correct:
                        correct_count += 1
                elif pred['type'] == 'high':
                    # A high prediction is correct if price went within 0.5% of predicted high
                    price_diff_percent = abs(high_price - pred['price']) / pred['price'] * 100
                    pred['actual_price'] = high_price
                    pred['price_diff_percent'] = price_diff_percent
                    # Consider correct if within 0.5% or price went higher than predicted
                    was_correct = price_diff_percent < 0.5 or high_price >= pred['price']
                    pred['was_correct'] = was_correct
                    if was_correct:
                        correct_count += 1
        # Return validation metrics
        if validated_count > 0:
            return {
                'validated_count': validated_count,
                'correct_count': correct_count,
                'accuracy': correct_count / validated_count
            }
        return None
    def calculate_pnl(self):
        """
        Calculate the current profit/loss of the open position
        Returns:
            float: Current PnL in dollars, 0 if no position is open
        """
        if self.position == 'flat':
            return 0.0
        current_price = self.data['1m'][-1][4]
        if self.position == 'long':
            pnl_percent = (current_price - self.entry_price) / self.entry_price * 100
        elif self.position == 'short':
            pnl_percent = (self.entry_price - current_price) / self.entry_price * 100
        else:
            return 0.0
        pnl_dollar = pnl_percent / 100 * self.position_size
        return pnl_dollar
 # Example usage
 if __name__ == "__main__":
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
--- a/crypto/gogo2/main_function.py
+++ b/crypto/gogo2/main_function.py
@@ -0,0 +1,24 @@
 import asyncio
 from exchange_simulator import ExchangeSimulator
 import logging
 # Set up logging
 logger = logging.getLogger(__name__)
 async def main():
    """
    Main function to run the training process.
    """
    # Initialize exchange simulator
    exchange = ExchangeSimulator()
    # Train agent
    print("Starting training process...")
    # Add your training code here
    print("Training complete!")
 if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Program terminated by user") 
--- a/crypto/gogo2/main_multiu_broken.py
+++ b/crypto/gogo2/main_multiu_broken.py
--- a/crypto/gogo2/main_multiu_fixed.py
+++ b/crypto/gogo2/main_multiu_fixed.py
@@ -0,0 +1,30 @@
 import asyncio
 import logging
 from exchange_simulator import ExchangeSimulator
 # Set up logging
 logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 async def main():
    """
    Main function to run the training process.
    """
    # Initialize exchange simulator
    exchange = ExchangeSimulator()
    # Train agent
    print("Starting training process...")
    # Add your training code here
    print("Training complete!")
 if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Program terminated by user")
    except Exception as e:
        logger.error(f"Error running main: {e}")
        import traceback
        logger.error(traceback.format_exc()) 
--- a/crypto/gogo2/run_enhanced_training.py
+++ b/crypto/gogo2/run_enhanced_training.py
@@ -1,9 +1,15 @@
 import argparse
 import os
 import sys
 import asyncio
 import torch
 from enhanced_training import enhanced_train_agent
 from exchange_simulator import ExchangeSimulator
 # Fix for Windows asyncio
 if sys.platform == 'win32':
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Enhanced Trading Bot Training')
@@ -26,6 +32,9 @@ def main():
    parser.add_argument('--refresh-data', action='store_true',
                        help='Refresh data before training')
    parser.add_argument('--verbose', action='store_true',
                        help='Enable verbose logging')
    args = parser.parse_args()
    # Set device
@@ -51,7 +60,8 @@ def main():
            exchange=exchange,
            num_episodes=args.episodes,
            continuous=False,
-            start_episode=0
+            start_episode=0,
            verbose=args.verbose
        )
    elif args.mode == 'continuous':
@@ -61,7 +71,8 @@ def main():
            exchange=exchange,
            num_episodes=args.episodes,
            continuous=True,
-            start_episode=args.start_episode
+            start_episode=args.start_episode,
            verbose=args.verbose
        )
    elif args.mode == 'evaluate':
--- a/crypto/gogo2/run_main.py
+++ b/crypto/gogo2/run_main.py
@@ -0,0 +1,18 @@
 import asyncio
 import logging
 from enhanced_training import main
 # Set up logging
 logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Program terminated by user")
    except Exception as e:
        logger.error(f"Error running main: {e}")
        import traceback
        logger.error(traceback.format_exc()) 
--- a/crypto/gogo2/visualize_logs.py
+++ b/crypto/gogo2/visualize_logs.py
@@ -0,0 +1,56 @@
 import os
 import argparse
 import subprocess
 import webbrowser
 import time
 from pathlib import Path
 def main():
    parser = argparse.ArgumentParser(description='Visualize TensorBoard logs')
    parser.add_argument('--logdir', type=str, default='./logs', help='Directory containing TensorBoard logs')
    parser.add_argument('--port', type=int, default=6006, help='Port for TensorBoard server')
    args = parser.parse_args()
    log_dir = Path(args.logdir)
    if not log_dir.exists():
        print(f"Log directory {log_dir} does not exist. Creating it...")
        log_dir.mkdir(parents=True, exist_ok=True)
    # Check if TensorBoard is installed
    try:
        subprocess.run(['tensorboard', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("TensorBoard not found. Installing...")
        subprocess.run(['pip', 'install', 'tensorboard'], check=True)
    # Start TensorBoard server
    print(f"Starting TensorBoard server on port {args.port}...")
    tensorboard_process = subprocess.Popen(
        ['tensorboard', '--logdir', str(log_dir), '--port', str(args.port)],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    # Wait for TensorBoard to start
    time.sleep(3)
    # Open browser
    url = f"http://localhost:{args.port}"
    print(f"Opening TensorBoard in browser: {url}")
    webbrowser.open(url)
    print("TensorBoard is running. Press Ctrl+C to stop.")
    try:
        # Keep the script running until interrupted
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        print("Stopping TensorBoard server...")
        tensorboard_process.terminate()
        tensorboard_process.wait()
        print("TensorBoard server stopped.")
 if __name__ == "__main__":
    main()