improvments and fixes

2025-03-12 16:52:49 +02:00
parent 506458d55e
commit 2e901e18f2
8 changed files with 2906 additions and 650 deletions
--- a/crypto/gogo2/_notes.md
+++ b/crypto/gogo2/_notes.md
@@ -1,3 +1,5 @@
 pip install torch-tb-profiler
 ensure we use GPU if available to train faster. during training we need to have RL loop that looks at streaming data, and retrospective backtesting/training on predictions. sincr the start of the traing we're only loosing. implement robust penalty and analysis when closing a loosing trade and improve the reward function.
--- a/crypto/gogo2/data_cache.py
+++ b/crypto/gogo2/data_cache.py
@@ -0,0 +1,319 @@
 import os
 import json
 import time
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta
 import logging
 # Set up logging
 logger = logging.getLogger('trading_bot')
 class OHLCVCache:
    """
    A simple cache for OHLCV data from exchanges.
    Stores data in a structured format and provides backup when exchange is unavailable.
    """
    def __init__(self, cache_dir="cache", max_age_hours=24):
        """
        Initialize the OHLCV cache.
        Args:
            cache_dir: Directory to store cache files
            max_age_hours: Maximum age of cached data in hours before considered stale
        """
        self.cache_dir = cache_dir
        self.max_age_seconds = max_age_hours * 3600
        # Create cache directory if it doesn't exist
        os.makedirs(cache_dir, exist_ok=True)
        # In-memory cache for faster access
        self.memory_cache = {}
    def _get_cache_filename(self, symbol, timeframe):
        """Generate a standardized filename for the cache file"""
        # Replace / with _ in symbol name (e.g., ETH/USDT -> ETH_USDT)
        safe_symbol = symbol.replace('/', '_')
        return os.path.join(self.cache_dir, f"{safe_symbol}_{timeframe}.json")
    def save(self, data, symbol, timeframe):
        """
        Save OHLCV data to cache.
        Args:
            data: List of dictionaries containing OHLCV data
            symbol: Trading pair symbol (e.g., 'ETH/USDT')
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h')
        """
        if not data:
            logger.warning(f"No data to cache for {symbol} ({timeframe})")
            return False
        try:
            # Convert data to a serializable format
            serializable_data = []
            for candle in data:
                serializable_data.append({
                    'timestamp': candle['timestamp'],
                    'open': float(candle['open']),
                    'high': float(candle['high']),
                    'low': float(candle['low']),
                    'close': float(candle['close']),
                    'volume': float(candle['volume'])
                })
            # Create cache entry with metadata
            cache_entry = {
                'symbol': symbol,
                'timeframe': timeframe,
                'last_updated': int(time.time()),
                'data': serializable_data
            }
            # Save to file
            filename = self._get_cache_filename(symbol, timeframe)
            with open(filename, 'w') as f:
                json.dump(cache_entry, f)
            # Update in-memory cache
            cache_key = f"{symbol}_{timeframe}"
            self.memory_cache[cache_key] = cache_entry
            logger.info(f"Cached {len(data)} candles for {symbol} ({timeframe})")
            return True
        except Exception as e:
            logger.error(f"Error saving data to cache: {e}")
            return False
    def load(self, symbol, timeframe, max_age_override=None):
        """
        Load OHLCV data from cache.
        Args:
            symbol: Trading pair symbol (e.g., 'ETH/USDT')
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h')
            max_age_override: Override the default max age (in seconds)
        Returns:
            List of dictionaries containing OHLCV data, or None if cache is missing or stale
        """
        cache_key = f"{symbol}_{timeframe}"
        max_age = max_age_override if max_age_override is not None else self.max_age_seconds
        try:
            # Check in-memory cache first
            if cache_key in self.memory_cache:
                cache_entry = self.memory_cache[cache_key]
                # Check if cache is fresh
                cache_age = int(time.time()) - cache_entry['last_updated']
                if cache_age <= max_age:
                    logger.info(f"Using in-memory cache for {symbol} ({timeframe}), age: {cache_age//60} minutes")
                    return cache_entry['data']
            # Check file cache
            filename = self._get_cache_filename(symbol, timeframe)
            if not os.path.exists(filename):
                logger.info(f"No cache file found for {symbol} ({timeframe})")
                return None
            # Load cache file
            with open(filename, 'r') as f:
                cache_entry = json.load(f)
            # Check if cache is fresh
            cache_age = int(time.time()) - cache_entry['last_updated']
            if cache_age > max_age:
                logger.info(f"Cache for {symbol} ({timeframe}) is stale ({cache_age//60} minutes old)")
                return None
            # Update in-memory cache
            self.memory_cache[cache_key] = cache_entry
            logger.info(f"Loaded {len(cache_entry['data'])} candles from cache for {symbol} ({timeframe})")
            return cache_entry['data']
        except Exception as e:
            logger.error(f"Error loading data from cache: {e}")
            return None
    def append(self, new_candle, symbol, timeframe):
        """
        Append a new candle to the cached data.
        Args:
            new_candle: Dictionary containing a single OHLCV candle
            symbol: Trading pair symbol (e.g., 'ETH/USDT')
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h')
        Returns:
            Boolean indicating success
        """
        try:
            # Load existing data
            data = self.load(symbol, timeframe, max_age_override=float('inf'))  # Ignore age for append
            if data is None:
                data = []
            # Check if the candle already exists (same timestamp)
            for i, candle in enumerate(data):
                if candle['timestamp'] == new_candle['timestamp']:
                    # Update existing candle
                    data[i] = {
                        'timestamp': new_candle['timestamp'],
                        'open': float(new_candle['open']),
                        'high': float(new_candle['high']),
                        'low': float(new_candle['low']),
                        'close': float(new_candle['close']),
                        'volume': float(new_candle['volume'])
                    }
                    # Save updated data
                    return self.save(data, symbol, timeframe)
            # Append new candle
            data.append({
                'timestamp': new_candle['timestamp'],
                'open': float(new_candle['open']),
                'high': float(new_candle['high']),
                'low': float(new_candle['low']),
                'close': float(new_candle['close']),
                'volume': float(new_candle['volume'])
            })
            # Save updated data
            return self.save(data, symbol, timeframe)
        except Exception as e:
            logger.error(f"Error appending candle to cache: {e}")
            return False
    def get_latest_timestamp(self, symbol, timeframe):
        """
        Get the timestamp of the most recent candle in the cache.
        Args:
            symbol: Trading pair symbol (e.g., 'ETH/USDT')
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h')
        Returns:
            Timestamp (milliseconds) of the most recent candle, or None if cache is empty
        """
        data = self.load(symbol, timeframe, max_age_override=float('inf'))  # Ignore age for this check
        if not data:
            return None
        # Find the most recent timestamp
        latest_timestamp = max(candle['timestamp'] for candle in data)
        return latest_timestamp
    def clear(self, symbol=None, timeframe=None):
        """
        Clear cache for a specific symbol and timeframe, or all cache if not specified.
        Args:
            symbol: Trading pair symbol (e.g., 'ETH/USDT'), or None to clear all symbols
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h'), or None to clear all timeframes
        Returns:
            Number of cache files deleted
        """
        count = 0
        try:
            if symbol and timeframe:
                # Clear specific cache
                filename = self._get_cache_filename(symbol, timeframe)
                if os.path.exists(filename):
                    os.remove(filename)
                    count = 1
                # Clear from memory cache
                cache_key = f"{symbol}_{timeframe}"
                if cache_key in self.memory_cache:
                    del self.memory_cache[cache_key]
            else:
                # Clear all matching caches
                for filename in os.listdir(self.cache_dir):
                    file_path = os.path.join(self.cache_dir, filename)
                    # Skip directories
                    if not os.path.isfile(file_path):
                        continue
                    # Check if file matches the filter
                    should_delete = True
                    if symbol:
                        safe_symbol = symbol.replace('/', '_')
                        if not filename.startswith(f"{safe_symbol}_"):
                            should_delete = False
                    if timeframe:
                        if not filename.endswith(f"_{timeframe}.json"):
                            should_delete = False
                    # Delete file if it matches the filter
                    if should_delete:
                        os.remove(file_path)
                        count += 1
                # Clear memory cache
                keys_to_delete = []
                for cache_key in self.memory_cache:
                    should_delete = True
                    if symbol:
                        if not cache_key.startswith(f"{symbol}_"):
                            should_delete = False
                    if timeframe:
                        if not cache_key.endswith(f"_{timeframe}"):
                            should_delete = False
                    if should_delete:
                        keys_to_delete.append(cache_key)
                for key in keys_to_delete:
                    del self.memory_cache[key]
            logger.info(f"Cleared {count} cache files")
            return count
        except Exception as e:
            logger.error(f"Error clearing cache: {e}")
            return 0
    def to_dataframe(self, symbol, timeframe):
        """
        Convert cached OHLCV data to a pandas DataFrame.
        Args:
            symbol: Trading pair symbol (e.g., 'ETH/USDT')
            timeframe: Timeframe of the data (e.g., '1m', '5m', '1h')
        Returns:
            pandas DataFrame with OHLCV data, or None if cache is missing
        """
        data = self.load(symbol, timeframe, max_age_override=float('inf'))  # Ignore age for conversion
        if not data:
            return None
        # Convert to DataFrame
        df = pd.DataFrame(data)
        # Convert timestamp to datetime
        df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
        # Set datetime as index
        df.set_index('datetime', inplace=True)
        return df
 # Create a global instance for easy access
 ohlcv_cache = OHLCVCache() 
--- a/crypto/gogo2/enhanced_models.py
+++ b/crypto/gogo2/enhanced_models.py
@@ -291,7 +291,7 @@ class EnhancedReplayBuffer:
    def update_priorities(self, indices, td_errors):
        for idx, td_error in zip(indices, td_errors):
            # Update priority based on TD error
-            priority = abs(td_error) + 1e-5  # Small constant to ensure non-zero priority
+            priority = float(abs(td_error) + 1e-5)  # Small constant to ensure non-zero priority
            self.priorities[idx] = priority
            self.max_priority = max(self.max_priority, priority)
--- a/crypto/gogo2/enhanced_training.py
+++ b/crypto/gogo2/enhanced_training.py
@@ -0,0 +1,765 @@
 import os
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.amp import GradScaler, autocast
 import numpy as np
 import matplotlib.pyplot as plt
 from datetime import datetime
 from tensorboardX import SummaryWriter
 # Import our enhanced models
 from enhanced_models import EnhancedPricePredictionModel, EnhancedDQN, EnhancedReplayBuffer, train_price_predictor, prepare_multi_timeframe_data
 # Constants
 TIMEFRAMES = ['1m', '15m', '1h']
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 LEARNING_RATE = 1e-4
 BATCH_SIZE = 64
 GAMMA = 0.99
 REPLAY_BUFFER_SIZE = 100000
 TARGET_UPDATE = 10
 NUM_EPISODES = 200
 MAX_STEPS_PER_EPISODE = 1000
 EPSILON_START = 1.0
 EPSILON_END = 0.01
 EPSILON_DECAY = 0.995
 SAVE_INTERVAL = 10
 CONTINUOUS_MODE = True
 CONTINUOUS_START_EPISODE = 0
 def setup_tensorboard():
    """Set up TensorBoard for logging training metrics"""
    current_time = datetime.now().strftime('%Y%m%d-%H%M%S')
    log_dir = os.path.join('runs', current_time)
    writer = SummaryWriter(log_dir)
    return writer
 def save_models(price_model, dqn_model, optimizer, episode, rewards, profits, win_rates, best_reward, best_pnl, best_winrate):
    """Save model checkpoints and clean up old ones to keep only top 5 and best PnL"""
    # Create models directory if it doesn't exist
    os.makedirs('models', exist_ok=True)
    # Save latest models
    torch.save({
        'price_model_state_dict': price_model.state_dict(),
        'dqn_model_state_dict': dqn_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'episode': episode,
        'rewards': rewards,
        'profits': profits,
        'win_rates': win_rates
    }, 'models/enhanced_trading_agent_latest.pt')
    # Save continuous training checkpoint
    continuous_model_path = f'models/enhanced_trading_agent_continuous_{episode}.pt'
    torch.save({
        'price_model_state_dict': price_model.state_dict(),
        'dqn_model_state_dict': dqn_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'episode': episode,
        'rewards': rewards,
        'profits': profits,
        'win_rates': win_rates
    }, continuous_model_path)
    # Save best models
    if rewards[-1] > best_reward:
        best_reward = rewards[-1]
        torch.save({
            'price_model_state_dict': price_model.state_dict(),
            'dqn_model_state_dict': dqn_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'episode': episode,
            'rewards': rewards,
            'profits': profits,
            'win_rates': win_rates
        }, 'models/enhanced_trading_agent_best_reward.pt')
    if profits[-1] > best_pnl:
        best_pnl = profits[-1]
        torch.save({
            'price_model_state_dict': price_model.state_dict(),
            'dqn_model_state_dict': dqn_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'episode': episode,
            'rewards': rewards,
            'profits': profits,
            'win_rates': win_rates
        }, 'models/enhanced_trading_agent_best_pnl.pt')
    if win_rates[-1] > best_winrate:
        best_winrate = win_rates[-1]
        torch.save({
            'price_model_state_dict': price_model.state_dict(),
            'dqn_model_state_dict': dqn_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'episode': episode,
            'rewards': rewards,
            'profits': profits,
            'win_rates': win_rates
        }, 'models/enhanced_trading_agent_best_winrate.pt')
    # Save final model at the end of training
    if episode == NUM_EPISODES - 1:
        torch.save({
            'price_model_state_dict': price_model.state_dict(),
            'dqn_model_state_dict': dqn_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'episode': episode,
            'rewards': rewards,
            'profits': profits,
            'win_rates': win_rates
        }, 'models/enhanced_trading_agent_final.pt')
    # Clean up old models - keep only top 5 most recent and best PnL
    cleanup_model_files()
    return best_reward, best_pnl, best_winrate
 def cleanup_model_files():
    """Keep only the top 5 most recent continuous models and the best models"""
    # Files we always want to keep
    essential_files = [
        'enhanced_trading_agent_latest.pt',
        'enhanced_trading_agent_best_reward.pt',
        'enhanced_trading_agent_best_pnl.pt',
        'enhanced_trading_agent_best_winrate.pt',
        'enhanced_trading_agent_final.pt'
    ]
    # Get all continuous training model files
    continuous_files = []
    for file in os.listdir('models'):
        if file.startswith('enhanced_trading_agent_continuous_') and file.endswith('.pt'):
            continuous_files.append(file)
    # Sort continuous files by episode number (newest first)
    if continuous_files:
        try:
            continuous_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]), reverse=True)
            # Keep only the 5 most recent continuous files
            files_to_keep = essential_files + continuous_files[:5]
        except (ValueError, IndexError):
            # Handle case where filename format is unexpected
            print("Warning: Could not sort continuous files by episode number. Keeping all continuous files.")
            files_to_keep = essential_files + continuous_files
    else:
        files_to_keep = essential_files
    # Delete all other model files
    for file in os.listdir('models'):
        if file.endswith('.pt') and file not in files_to_keep:
            try:
                os.remove(os.path.join('models', file))
                print(f"Deleted old model file: {file}")
            except Exception as e:
                print(f"Error deleting {file}: {e}")
 def plot_training_results(rewards, profits, win_rates, episode):
    """Plot training metrics"""
    plt.figure(figsize=(15, 15))
    # Plot rewards
    plt.subplot(3, 1, 1)
    plt.plot(rewards)
    plt.title('Average Reward per Episode')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    # Plot profits
    plt.subplot(3, 1, 2)
    plt.plot(profits)
    plt.title('Profit/Loss per Episode')
    plt.xlabel('Episode')
    plt.ylabel('PnL ($)')
    # Plot win rates
    plt.subplot(3, 1, 3)
    plt.plot(win_rates)
    plt.title('Win Rate per Episode')
    plt.xlabel('Episode')
    plt.ylabel('Win Rate (%)')
    plt.ylim(0, 100)
    plt.tight_layout()
    plt.savefig('training_results.png')
    # Also save episode-specific plots periodically
    if episode % 20 == 0:
        os.makedirs('visualizations', exist_ok=True)
        plt.savefig(f'visualizations/training_episode_{episode}.png')
    plt.close()
 def load_checkpoint(price_model, dqn_model, optimizer, episode=None):
    """Load model checkpoint for continuous training"""
    if episode is not None:
        checkpoint_path = f'models/enhanced_trading_agent_continuous_{episode}.pt'
    else:
        checkpoint_path = 'models/enhanced_trading_agent_latest.pt'
    if os.path.exists(checkpoint_path):
        print(f"Loading checkpoint from {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, map_location=DEVICE)
        price_model.load_state_dict(checkpoint['price_model_state_dict'])
        dqn_model.load_state_dict(checkpoint['dqn_model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_episode = checkpoint['episode'] + 1
        rewards = checkpoint['rewards']
        profits = checkpoint['profits']
        win_rates = checkpoint['win_rates']
        print(f"Resuming training from episode {start_episode}")
        return start_episode, rewards, profits, win_rates
    else:
        print("No checkpoint found, starting training from scratch")
        return 0, [], [], []
 def enhanced_train_agent(exchange, num_episodes=NUM_EPISODES, continuous=CONTINUOUS_MODE, start_episode=CONTINUOUS_START_EPISODE):
    """
    Train the enhanced trading agent using multi-timeframe data
    Args:
        exchange: Exchange object to fetch data from
        num_episodes: Number of episodes to train for
        continuous: Whether to continue training from a checkpoint
        start_episode: Episode to start from if continuous training
    """
    print(f"Training on device: {DEVICE}")
    # Set up TensorBoard
    writer = setup_tensorboard()
    # Initialize models
    state_dim = 100  # Increased state dimension for multi-timeframe features
    action_dim = 3   # Buy, Sell, Hold
    price_model = EnhancedPricePredictionModel(
        input_dim=2,  # Price and volume
        hidden_dim=256,
        num_layers=3,
        output_dim=5,  # Predict next 5 candles
        num_timeframes=len(TIMEFRAMES)
    ).to(DEVICE)
    dqn_model = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(DEVICE)
    target_dqn = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(DEVICE)
    # Copy initial weights to target network
    target_dqn.load_state_dict(dqn_model.state_dict())
    # Initialize optimizer
    optimizer = optim.Adam(list(price_model.parameters()) + list(dqn_model.parameters()), lr=LEARNING_RATE)
    # Initialize replay buffer
    replay_buffer = EnhancedReplayBuffer(
        capacity=REPLAY_BUFFER_SIZE,
        alpha=0.6,
        beta=0.4,
        beta_increment=0.001,
        n_step=3,
        gamma=GAMMA
    )
    # Initialize gradient scaler for mixed precision training
    scaler = GradScaler(enabled=(DEVICE.type == 'cuda'))
    # Initialize tracking variables
    rewards = []
    profits = []
    win_rates = []
    best_reward = float('-inf')
    best_pnl = float('-inf')
    best_winrate = float('-inf')
    # Load checkpoint if continuous training
    if continuous:
        start_episode, rewards, profits, win_rates = load_checkpoint(
            price_model, dqn_model, optimizer, start_episode
        )
    # Prepare multi-timeframe data for price prediction model training
    data_loaders = prepare_multi_timeframe_data(exchange, TIMEFRAMES)
    # Pre-train price prediction model
    print("Pre-training price prediction model...")
    train_price_predictor(price_model, data_loaders, optimizer, DEVICE, epochs=5)
    # Main training loop
    epsilon = EPSILON_START
    for episode in range(start_episode, num_episodes):
        print(f"Episode {episode+1}/{num_episodes}")
        # Reset environment
        state = initialize_state(exchange, TIMEFRAMES)
        total_reward = 0
        trades = []
        wins = 0
        losses = 0
        # Episode loop
        for step in range(MAX_STEPS_PER_EPISODE):
            # Epsilon-greedy action selection
            if np.random.random() < epsilon:
                action = np.random.randint(0, action_dim)
            else:
                with torch.no_grad():
                    state_tensor = torch.FloatTensor(state).unsqueeze(0).to(DEVICE)
                    q_values, _, _ = dqn_model(state_tensor)
                    action = q_values.argmax().item()
            # Execute action and get next state and reward
            next_state, reward, done, trade_info = step_environment(
                exchange, state, action, price_model, TIMEFRAMES, DEVICE
            )
            # Store transition in replay buffer
            replay_buffer.push(
                torch.FloatTensor(state),
                action,
                reward,
                torch.FloatTensor(next_state),
                done
            )
            # Update state and accumulate reward
            state = next_state
            total_reward += reward
            # Track trade outcomes
            if trade_info is not None:
                trades.append(trade_info)
                if trade_info['pnl'] > 0:
                    wins += 1
                elif trade_info['pnl'] < 0:
                    losses += 1
            # Learn from experiences if enough samples
            if len(replay_buffer) > BATCH_SIZE:
                learn(dqn_model, target_dqn, replay_buffer, optimizer, scaler, DEVICE)
            if done:
                break
        # Update target network
        if episode % TARGET_UPDATE == 0:
            target_dqn.load_state_dict(dqn_model.state_dict())
        # Calculate episode metrics
        avg_reward = total_reward / (step + 1)
        total_pnl = sum(trade['pnl'] for trade in trades) if trades else 0
        win_rate = (wins / (wins + losses) * 100) if (wins + losses) > 0 else 0
        # Decay epsilon
        epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)
        # Track metrics
        rewards.append(avg_reward)
        profits.append(total_pnl)
        win_rates.append(win_rate)
        # Log to TensorBoard
        writer.add_scalar('Training/Reward', avg_reward, episode)
        writer.add_scalar('Training/Profit', total_pnl, episode)
        writer.add_scalar('Training/WinRate', win_rate, episode)
        writer.add_scalar('Training/Epsilon', epsilon, episode)
        # Print episode summary
        print(f"Episode {episode+1} - Avg Reward: {avg_reward:.2f}, PnL: ${total_pnl:.2f}, Win Rate: {win_rate:.1f}%")
        # Save models and plot results
        if episode % SAVE_INTERVAL == 0 or episode == num_episodes - 1:
            best_reward, best_pnl, best_winrate = save_models(
                price_model, dqn_model, optimizer, episode, 
                rewards, profits, win_rates, 
                best_reward, best_pnl, best_winrate
            )
            plot_training_results(rewards, profits, win_rates, episode)
    # Close TensorBoard writer
    writer.close()
    # Final save and plot
    best_reward, best_pnl, best_winrate = save_models(
        price_model, dqn_model, optimizer, num_episodes - 1, 
        rewards, profits, win_rates, 
        best_reward, best_pnl, best_winrate
    )
    plot_training_results(rewards, profits, win_rates, num_episodes - 1)
    print("Training complete!")
    return price_model, dqn_model
 def learn(dqn, target_dqn, replay_buffer, optimizer, scaler, device):
    """Update the DQN model using experiences from the replay buffer"""
    # Sample from replay buffer
    states, actions, rewards, next_states, dones, indices, weights = replay_buffer.sample(BATCH_SIZE)
    # Move to device
    states = states.to(device)
    actions = actions.to(device)
    rewards = rewards.to(device)
    next_states = next_states.to(device)
    dones = dones.to(device)
    weights = weights.to(device)
    # Get current Q values
    if device.type == 'cuda':
        with autocast(device_type='cuda', enabled=True):
            current_q_values, _, _ = dqn(states)
            current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
            # Compute target Q values
            with torch.no_grad():
                next_q_values, _, _ = target_dqn(next_states)
                max_next_q_values = next_q_values.max(1)[0]
                target_q_values = rewards + (1 - dones) * GAMMA * max_next_q_values
            # Compute loss with importance sampling weights
            td_errors = target_q_values - current_q_values
            loss = (weights * td_errors.pow(2)).mean()
    else:
        # CPU version without autocast
        current_q_values, _, _ = dqn(states)
        current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
        # Compute target Q values
        with torch.no_grad():
            next_q_values, _, _ = target_dqn(next_states)
            max_next_q_values = next_q_values.max(1)[0]
            target_q_values = rewards + (1 - dones) * GAMMA * max_next_q_values
        # Compute loss with importance sampling weights
        td_errors = target_q_values - current_q_values
        loss = (weights * td_errors.pow(2)).mean()
    # Update priorities in replay buffer
    replay_buffer.update_priorities(indices, td_errors.abs().detach().cpu().numpy())
    # Optimize the model with mixed precision
    optimizer.zero_grad()
    if device.type == 'cuda':
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(dqn.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
    else:
        # CPU version without scaler
        loss.backward()
        torch.nn.utils.clip_grad_norm_(dqn.parameters(), max_norm=1.0)
        optimizer.step()
 def initialize_state(exchange, timeframes):
    """Initialize the state with data from multiple timeframes"""
    # Fetch data for each timeframe
    timeframe_data = {}
    for tf in timeframes:
        candles = exchange.fetch_ohlcv(timeframe=tf, limit=30)
        timeframe_data[tf] = candles
    # Extract features from each timeframe
    state = []
    for tf in timeframes:
        candles = timeframe_data[tf]
        # Price features
        prices = [candle[4] for candle in candles[-10:]]  # Last 10 close prices
        price_changes = [prices[i]/prices[i-1] - 1 for i in range(1, len(prices))]
        # Volume features
        volumes = [candle[5] for candle in candles[-10:]]  # Last 10 volumes
        volume_changes = [volumes[i]/volumes[i-1] - 1 for i in range(1, len(volumes))]
        # Technical indicators
        # Simple Moving Averages
        sma_5 = sum(prices[-5:]) / 5
        sma_10 = sum(prices) / 10
        # Relative Strength Index (simplified)
        gains = [max(0, price_changes[i]) for i in range(len(price_changes))]
        losses = [max(0, -price_changes[i]) for i in range(len(price_changes))]
        avg_gain = sum(gains) / len(gains)
        avg_loss = sum(losses) / len(losses)
        rs = avg_gain / (avg_loss + 1e-10)  # Avoid division by zero
        rsi = 100 - (100 / (1 + rs))
        # Add features to state
        state.extend(price_changes)  # 9 features
        state.extend(volume_changes)  # 9 features
        state.append(sma_5 / prices[-1] - 1)  # 1 feature
        state.append(sma_10 / prices[-1] - 1)  # 1 feature
        state.append(rsi / 100)  # 1 feature
    # Add market regime features
    # This is a placeholder - in a real implementation, you would use the market_regime_classifier
    # from the DQN model to predict the current market regime
    state.extend([0, 0, 0])  # 3 features for market regime (one-hot encoded)
    # Add additional features to reach the expected dimension of 100
    # Calculate more technical indicators
    for tf in timeframes:
        candles = timeframe_data[tf]
        prices = [candle[4] for candle in candles[-20:]]  # Last 20 close prices
        # Bollinger Bands
        window = 20
        if len(prices) >= window:
            sma_20 = sum(prices[-window:]) / window
            std_dev = (sum((price - sma_20) ** 2 for price in prices[-window:]) / window) ** 0.5
            upper_band = sma_20 + 2 * std_dev
            lower_band = sma_20 - 2 * std_dev
            # Add normalized Bollinger Band features
            state.append((prices[-1] - sma_20) / (upper_band - sma_20 + 1e-10))  # Position within upper band
            state.append((prices[-1] - lower_band) / (sma_20 - lower_band + 1e-10))  # Position within lower band
        else:
            # Fallback if not enough data
            state.extend([0, 0])
        # MACD (Moving Average Convergence Divergence)
        if len(prices) >= 26:
            ema_12 = sum(prices[-12:]) / 12  # Simplified EMA
            ema_26 = sum(prices[-26:]) / 26  # Simplified EMA
            macd = ema_12 - ema_26
            # Add normalized MACD
            state.append(macd / prices[-1])
        else:
            # Fallback if not enough data
            state.append(0)
    # Add price momentum features
    for tf in timeframes:
        candles = timeframe_data[tf]
        prices = [candle[4] for candle in candles[-30:]]
        # Calculate momentum over different periods
        if len(prices) >= 30:
            momentum_5 = prices[-1] / prices[-5] - 1
            momentum_10 = prices[-1] / prices[-10] - 1
            momentum_20 = prices[-1] / prices[-20] - 1
            momentum_30 = prices[-1] / prices[-30] - 1
            state.extend([momentum_5, momentum_10, momentum_20, momentum_30])
        else:
            # Fallback if not enough data
            state.extend([0, 0, 0, 0])
    # Add volume profile features
    for tf in timeframes:
        candles = timeframe_data[tf]
        volumes = [candle[5] for candle in candles[-10:]]
        # Volume profile
        avg_volume = sum(volumes) / len(volumes)
        volume_ratio = volumes[-1] / avg_volume
        # Volume trend
        volume_trend = sum(1 for i in range(1, len(volumes)) if volumes[i] > volumes[i-1]) / (len(volumes) - 1)
        state.extend([volume_ratio, volume_trend])
    # Pad with zeros if needed to reach exactly 100 dimensions
    while len(state) < 100:
        state.append(0)
    # Ensure state has exactly 100 dimensions
    if len(state) > 100:
        state = state[:100]
    assert len(state) == 100, f"State dimension mismatch: {len(state)} != 100"
    return state
 def step_environment(exchange, state, action, price_model, timeframes, device):
    """
    Execute action in the environment and return next state, reward, done flag, and trade info
    Args:
        exchange: Exchange object to interact with
        state: Current state
        action: Action to take (0: Hold, 1: Buy, 2: Sell)
        price_model: Price prediction model
        timeframes: List of timeframes to use
        device: Device to run models on
    Returns:
        next_state: Next state after taking action
        reward: Reward received
        done: Whether episode is done
        trade_info: Information about the trade (if any)
    """
    # Fetch latest data for each timeframe
    timeframe_data = {}
    for tf in timeframes:
        candles = exchange.fetch_ohlcv(timeframe=tf, limit=30)
        timeframe_data[tf] = candles
    # Prepare inputs for price prediction model
    price_inputs = []
    for tf in timeframes:
        candles = timeframe_data[tf]
        # Extract price and volume data
        input_data = torch.tensor([
            [candle[4], candle[5]] for candle in candles[-30:]  # Last 30 candles
        ], dtype=torch.float32).unsqueeze(0).to(device)  # Add batch dimension
        price_inputs.append(input_data)
    # Get price and extrema predictions
    with torch.no_grad():
        price_pred, extrema_logits, volume_pred = price_model(price_inputs)
    # Convert predictions to numpy
    price_pred = price_pred.cpu().numpy()[0]  # Remove batch dimension
    extrema_probs = torch.sigmoid(extrema_logits).cpu().numpy()[0]
    volume_pred = volume_pred.cpu().numpy()[0]
    # Execute action
    current_price = timeframe_data['1m'][-1][4]  # Current close price
    trade_info = None
    reward = 0
    if action == 1:  # Buy
        # Check if we're at a predicted low point (good time to buy)
        is_predicted_low = any(extrema_probs[i*2+1] > 0.7 for i in range(5))
        # Calculate entry quality based on predictions
        entry_quality = 0.5  # Default quality
        if is_predicted_low:
            entry_quality += 0.2  # Bonus for buying at predicted low
        # Check volume confirmation
        volume_increasing = volume_pred[0] > timeframe_data['1m'][-1][5]
        if volume_increasing:
            entry_quality += 0.1  # Bonus for increasing volume
        # Execute buy order
        # In a real implementation, this would interact with the exchange
        # For now, we'll simulate the trade
        trade_info = {
            'action': 'buy',
            'price': current_price,
            'size': 100 * entry_quality,  # Size based on entry quality
            'entry_quality': entry_quality,
            'pnl': 0  # Will be updated later
        }
        # Calculate reward
        # Base reward for taking action
        reward = 1
        # Bonus for buying at predicted low
        if is_predicted_low:
            reward += 5
            print("Trading at predicted low - additional reward")
        # Bonus for volume confirmation
        if volume_increasing:
            reward += 2
            print("Trading with high volume - additional reward")
    elif action == 2:  # Sell
        # Check if we're at a predicted high point (good time to sell)
        is_predicted_high = any(extrema_probs[i*2] > 0.7 for i in range(5))
        # Calculate entry quality based on predictions
        entry_quality = 0.5  # Default quality
        if is_predicted_high:
            entry_quality += 0.2  # Bonus for selling at predicted high
        # Check volume confirmation
        volume_increasing = volume_pred[0] > timeframe_data['1m'][-1][5]
        if volume_increasing:
            entry_quality += 0.1  # Bonus for increasing volume
        # Execute sell order
        # In a real implementation, this would interact with the exchange
        # For now, we'll simulate the trade
        trade_info = {
            'action': 'sell',
            'price': current_price,
            'size': 100 * entry_quality,  # Size based on entry quality
            'entry_quality': entry_quality,
            'pnl': 0  # Will be updated later
        }
        # Calculate reward
        # Base reward for taking action
        reward = 1
        # Bonus for selling at predicted high
        if is_predicted_high:
            reward += 5
            print("Trading at predicted high - additional reward")
        # Bonus for volume confirmation
        if volume_increasing:
            reward += 2
            print("Trading with high volume - additional reward")
    else:  # Hold
        # Small reward for holding
        reward = 0.1
    # Simulate trade outcome
    if trade_info is not None:
        # In a real implementation, this would be based on actual market movement
        # For now, we'll use the price prediction to simulate the outcome
        future_price = price_pred[0]  # Price in the next candle
        if trade_info['action'] == 'buy':
            # For buy, profit if price goes up
            pnl_pct = (future_price / current_price - 1) * 100
            trade_info['pnl'] = pnl_pct * trade_info['size'] / 100
        else:  # sell
            # For sell, profit if price goes down
            pnl_pct = (1 - future_price / current_price) * 100
            trade_info['pnl'] = pnl_pct * trade_info['size'] / 100
        # Adjust reward based on trade outcome
        reward += trade_info['pnl'] * 10  # Scale PnL for reward
    # Update state
    next_state = initialize_state(exchange, timeframes)
    # Check if episode is done
    # In a real implementation, this would be based on episode length or other criteria
    done = False
    return next_state, reward, done, trade_info
 # Main function to run training
 def main():
    from exchange_simulator import ExchangeSimulator
    # Initialize exchange simulator
    exchange = ExchangeSimulator()
    # Train agent
    price_model, dqn_model = enhanced_train_agent(
        exchange=exchange,
        num_episodes=NUM_EPISODES,
        continuous=CONTINUOUS_MODE,
        start_episode=CONTINUOUS_START_EPISODE
    )
    print("Training complete!")
 if __name__ == "__main__":
    main() 
--- a/crypto/gogo2/exchange_simulator.py
+++ b/crypto/gogo2/exchange_simulator.py
@@ -0,0 +1,373 @@
 import numpy as np
 import pandas as pd
 import os
 import random
 from datetime import datetime, timedelta
 class ExchangeSimulator:
    """
    A simple exchange simulator that generates realistic market data
    for testing trading algorithms without connecting to a real exchange.
    """
    def __init__(self, symbol="BTC/USDT", seed=42):
        """
        Initialize the exchange simulator
        Args:
            symbol: Trading pair symbol
            seed: Random seed for reproducibility
        """
        self.symbol = symbol
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)
        # Initialize data storage
        self.data = {}
        self.current_timestamp = datetime.now()
        # Generate initial data for different timeframes
        self.timeframes = ['1m', '5m', '15m', '30m', '1h', '4h', '1d']
        self.timeframe_minutes = {
            '1m': 1,
            '5m': 5,
            '15m': 15,
            '30m': 30,
            '1h': 60,
            '4h': 240,
            '1d': 1440
        }
        # Generate initial price around $50,000 (for BTC/USDT)
        self.base_price = 50000.0
        # Generate data for each timeframe
        for tf in self.timeframes:
            self._generate_initial_data(tf)
    def _generate_initial_data(self, timeframe, num_candles=1000):
        """
        Generate initial historical data for a specific timeframe
        Args:
            timeframe: Timeframe to generate data for
            num_candles: Number of candles to generate
        """
        # Calculate time delta for this timeframe
        minutes = self.timeframe_minutes[timeframe]
        # Generate timestamps
        end_time = self.current_timestamp
        timestamps = [end_time - timedelta(minutes=minutes * i) for i in range(num_candles)]
        timestamps.reverse()  # Oldest first
        # Generate price data with realistic patterns
        prices = self._generate_price_series(num_candles)
        # Generate volume data with realistic patterns
        volumes = self._generate_volume_series(num_candles, timeframe)
        # Create OHLCV data
        ohlcv_data = []
        for i in range(num_candles):
            # Calculate OHLC based on close price
            close = prices[i]
            high = close * (1 + np.random.uniform(0, 0.01))
            low = close * (1 - np.random.uniform(0, 0.01))
            open_price = prices[i-1] if i > 0 else close * (1 - np.random.uniform(-0.005, 0.005))
            # Create candle
            candle = [
                int(timestamps[i].timestamp() * 1000),  # Timestamp in milliseconds
                open_price,  # Open
                high,        # High
                low,         # Low
                close,       # Close
                volumes[i]   # Volume
            ]
            ohlcv_data.append(candle)
        # Store data
        self.data[timeframe] = ohlcv_data
    def _generate_price_series(self, length):
        """
        Generate a realistic price series with trends, reversals, and volatility
        Args:
            length: Number of prices to generate
        Returns:
            List of prices
        """
        # Start with base price
        prices = [self.base_price]
        # Parameters for price generation
        trend_strength = 0.001  # Strength of trend
        volatility = 0.005      # Daily volatility
        mean_reversion = 0.001  # Mean reversion strength
        # Generate price series
        for i in range(1, length):
            # Determine if we're in a trend
            if i % 100 == 0:
                # Change trend direction every ~100 candles
                trend_strength = -trend_strength
            # Calculate price change
            trend = trend_strength * prices[-1]
            random_change = np.random.normal(0, volatility) * prices[-1]
            mean_reversion_change = mean_reversion * (self.base_price - prices[-1])
            # Calculate new price
            new_price = prices[-1] + trend + random_change + mean_reversion_change
            # Ensure price doesn't go negative
            new_price = max(new_price, prices[-1] * 0.9)
            prices.append(new_price)
        return prices
    def _generate_volume_series(self, length, timeframe):
        """
        Generate a realistic volume series with patterns
        Args:
            length: Number of volumes to generate
            timeframe: Timeframe for volume scaling
        Returns:
            List of volumes
        """
        # Base volume depends on timeframe
        base_volume = {
            '1m': 10,
            '5m': 50,
            '15m': 150,
            '30m': 300,
            '1h': 600,
            '4h': 2400,
            '1d': 10000
        }[timeframe]
        # Generate volume series
        volumes = []
        for i in range(length):
            # Volume tends to be higher at trend reversals and during volatile periods
            cycle_factor = 1 + 0.5 * np.sin(i / 20)  # Cyclical pattern
            random_factor = np.random.lognormal(0, 0.5)  # Random spikes
            # Calculate volume
            volume = base_volume * cycle_factor * random_factor
            # Add some volume spikes
            if random.random() < 0.05:  # 5% chance of volume spike
                volume *= random.uniform(2, 5)
            volumes.append(volume)
        return volumes
    def fetch_ohlcv(self, timeframe='1m', limit=100, since=None):
        """
        Fetch OHLCV data for a specific timeframe
        Args:
            timeframe: Timeframe to fetch data for
            limit: Number of candles to fetch
            since: Timestamp to fetch data since (not used in simulator)
        Returns:
            List of OHLCV candles
        """
        # Ensure timeframe exists
        if timeframe not in self.data:
            if timeframe in self.timeframe_minutes:
                self._generate_initial_data(timeframe)
            else:
                # Default to 1m if timeframe not supported
                timeframe = '1m'
        # Get data
        data = self.data[timeframe]
        # Return limited data
        return data[-limit:]
    def update(self):
        """
        Update the exchange data by generating a new candle for each timeframe
        """
        # Update current timestamp
        self.current_timestamp = datetime.now()
        # Update each timeframe
        for tf in self.timeframes:
            self._add_new_candle(tf)
    def _add_new_candle(self, timeframe):
        """
        Add a new candle to the specified timeframe
        Args:
            timeframe: Timeframe to add candle to
        """
        # Get existing data
        data = self.data[timeframe]
        # Get last close price
        last_close = data[-1][4]
        # Calculate time delta for this timeframe
        minutes = self.timeframe_minutes[timeframe]
        # Calculate new timestamp
        new_timestamp = int((data[-1][0] / 1000 + minutes * 60) * 1000)
        # Generate new price with some randomness
        price_change = np.random.normal(0, 0.002) * last_close
        new_close = last_close + price_change
        # Calculate OHLC
        new_open = last_close
        new_high = max(new_open, new_close) * (1 + np.random.uniform(0, 0.005))
        new_low = min(new_open, new_close) * (1 - np.random.uniform(0, 0.005))
        # Generate volume
        base_volume = data[-1][5]
        volume_change = np.random.normal(0, 0.2) * base_volume
        new_volume = max(base_volume + volume_change, base_volume * 0.5)
        # Create new candle
        new_candle = [
            new_timestamp,
            new_open,
            new_high,
            new_low,
            new_close,
            new_volume
        ]
        # Add to data
        self.data[timeframe].append(new_candle)
    def get_ticker(self, symbol=None):
        """
        Get current ticker information
        Args:
            symbol: Symbol to get ticker for (defaults to initialized symbol)
        Returns:
            Dictionary with ticker information
        """
        if symbol is None:
            symbol = self.symbol
        # Get latest 1m candle
        latest_candle = self.data['1m'][-1]
        return {
            'symbol': symbol,
            'bid': latest_candle[4] * 0.9999,  # Slightly below last price
            'ask': latest_candle[4] * 1.0001,  # Slightly above last price
            'last': latest_candle[4],
            'high': latest_candle[2],
            'low': latest_candle[3],
            'volume': latest_candle[5],
            'timestamp': latest_candle[0]
        }
    def create_order(self, symbol, type, side, amount, price=None):
        """
        Simulate creating an order
        Args:
            symbol: Symbol to create order for
            type: Order type (limit, market)
            side: Order side (buy, sell)
            amount: Order amount
            price: Order price (for limit orders)
        Returns:
            Dictionary with order information
        """
        # Get current ticker
        ticker = self.get_ticker(symbol)
        # Determine execution price
        if type == 'market':
            if side == 'buy':
                execution_price = ticker['ask']
            else:
                execution_price = ticker['bid']
        else:  # limit order
            execution_price = price
        # Create order object
        order = {
            'id': f"order_{int(datetime.now().timestamp() * 1000)}",
            'symbol': symbol,
            'type': type,
            'side': side,
            'amount': amount,
            'price': execution_price,
            'cost': amount * execution_price,
            'filled': amount,
            'status': 'closed',
            'timestamp': int(datetime.now().timestamp() * 1000)
        }
        return order
    def fetch_balance(self):
        """
        Fetch account balance (simulated)
        Returns:
            Dictionary with balance information
        """
        return {
            'total': {
                'USD': 10000.0,
                'BTC': 1.0
            },
            'free': {
                'USD': 5000.0,
                'BTC': 0.5
            },
            'used': {
                'USD': 5000.0,
                'BTC': 0.5
            }
        }
 # Example usage
 if __name__ == "__main__":
    # Create exchange simulator
    exchange = ExchangeSimulator()
    # Fetch some data
    ohlcv = exchange.fetch_ohlcv(timeframe='1h', limit=10)
    print("OHLCV data (1h timeframe):")
    for candle in ohlcv[-5:]:
        timestamp = datetime.fromtimestamp(candle[0] / 1000)
        print(f"{timestamp}: Open={candle[1]:.2f}, High={candle[2]:.2f}, Low={candle[3]:.2f}, Close={candle[4]:.2f}, Volume={candle[5]:.2f}")
    # Get current ticker
    ticker = exchange.get_ticker()
    print(f"\nCurrent ticker: {ticker['last']:.2f}")
    # Create a market buy order
    order = exchange.create_order("BTC/USDT", "market", "buy", 0.1)
    print(f"\nCreated order: {order}")
    # Update the exchange (simulate time passing)
    exchange.update()
    # Get updated ticker
    updated_ticker = exchange.get_ticker()
    print(f"\nUpdated ticker: {updated_ticker['last']:.2f}") 
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
--- a/crypto/gogo2/run_enhanced_training.py
+++ b/crypto/gogo2/run_enhanced_training.py
@@ -0,0 +1,305 @@
 import argparse
 import os
 import torch
 from enhanced_training import enhanced_train_agent
 from exchange_simulator import ExchangeSimulator
 def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Enhanced Trading Bot Training')
    parser.add_argument('--mode', type=str, default='train', choices=['train', 'continuous', 'evaluate', 'live', 'demo'],
                        help='Mode to run the trading bot in')
    parser.add_argument('--episodes', type=int, default=100,
                        help='Number of episodes to train for')
    parser.add_argument('--start-episode', type=int, default=0,
                        help='Episode to start from for continuous training')
    parser.add_argument('--device', type=str, default='auto',
                        help='Device to train on (auto, cuda, cpu)')
    parser.add_argument('--timeframes', type=str, default='1m,15m,1h',
                        help='Comma-separated list of timeframes to use')
    parser.add_argument('--refresh-data', action='store_true',
                        help='Refresh data before training')
    args = parser.parse_args()
    # Set device
    if args.device == 'auto':
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    else:
        device = torch.device(args.device)
    print(f"Using device: {device}")
    # Parse timeframes
    timeframes = args.timeframes.split(',')
    print(f"Using timeframes: {timeframes}")
    # Initialize exchange simulator
    exchange = ExchangeSimulator()
    # Run in specified mode
    if args.mode == 'train':
        # Train from scratch
        print(f"Training for {args.episodes} episodes...")
        enhanced_train_agent(
            exchange=exchange,
            num_episodes=args.episodes,
            continuous=False,
            start_episode=0
        )
    elif args.mode == 'continuous':
        # Continue training from checkpoint
        print(f"Continuing training from episode {args.start_episode} for {args.episodes} episodes...")
        enhanced_train_agent(
            exchange=exchange,
            num_episodes=args.episodes,
            continuous=True,
            start_episode=args.start_episode
        )
    elif args.mode == 'evaluate':
        # Evaluate the model
        print("Evaluating model...")
        evaluate_model(exchange, device)
    elif args.mode == 'live' or args.mode == 'demo':
        # Run in live or demo mode
        is_demo = args.mode == 'demo'
        print(f"Running in {'demo' if is_demo else 'live'} mode...")
        run_live(exchange, device, is_demo=is_demo)
    print("Done!")
 def evaluate_model(exchange, device):
    """
    Evaluate the trained model
    Args:
        exchange: Exchange simulator
        device: Device to run on
    """
    from enhanced_models import EnhancedPricePredictionModel, EnhancedDQN
    import torch
    import numpy as np
    # Load the best model
    model_path = 'models/enhanced_trading_agent_best_pnl.pt'
    if not os.path.exists(model_path):
        model_path = 'models/enhanced_trading_agent_latest.pt'
    if not os.path.exists(model_path):
        print("No model found to evaluate!")
        return
    print(f"Loading model from {model_path}")
    checkpoint = torch.load(model_path, map_location=device)
    # Initialize models
    state_dim = 100
    action_dim = 3
    timeframes = ['1m', '15m', '1h']
    price_model = EnhancedPricePredictionModel(
        input_dim=2,
        hidden_dim=256,
        num_layers=3,
        output_dim=5,
        num_timeframes=len(timeframes)
    ).to(device)
    dqn_model = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(device)
    # Load model weights
    price_model.load_state_dict(checkpoint['price_model_state_dict'])
    dqn_model.load_state_dict(checkpoint['dqn_model_state_dict'])
    # Set models to evaluation mode
    price_model.eval()
    dqn_model.eval()
    # Run evaluation
    num_steps = 1000
    total_reward = 0
    trades = []
    # Initialize state
    from enhanced_training import initialize_state, step_environment
    state = initialize_state(exchange, timeframes)
    for step in range(num_steps):
        # Select action
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            q_values, _, _ = dqn_model(state_tensor)
            action = q_values.argmax().item()
        # Execute action
        next_state, reward, done, trade_info = step_environment(
            exchange, state, action, price_model, timeframes, device
        )
        # Update state and accumulate reward
        state = next_state
        total_reward += reward
        # Track trade
        if trade_info is not None:
            trades.append(trade_info)
            print(f"Trade: {trade_info['action']} at {trade_info['price']:.2f}, PnL: {trade_info['pnl']:.2f}")
        # Update exchange (simulate time passing)
        if step % 10 == 0:
            exchange.update()
        if done:
            break
    # Calculate metrics
    avg_reward = total_reward / num_steps
    total_pnl = sum(trade['pnl'] for trade in trades) if trades else 0
    wins = sum(1 for trade in trades if trade['pnl'] > 0)
    losses = sum(1 for trade in trades if trade['pnl'] < 0)
    win_rate = (wins / (wins + losses) * 100) if (wins + losses) > 0 else 0
    print("\nEvaluation Results:")
    print(f"Average Reward: {avg_reward:.2f}")
    print(f"Total PnL: ${total_pnl:.2f}")
    print(f"Win Rate: {win_rate:.1f}% ({wins}/{wins+losses})")
 def run_live(exchange, device, is_demo=True):
    """
    Run the trading bot in live or demo mode
    Args:
        exchange: Exchange simulator or real exchange
        device: Device to run on
        is_demo: Whether to run in demo mode (no real trades)
    """
    from enhanced_models import EnhancedPricePredictionModel, EnhancedDQN
    import torch
    import time
    # Load the best model
    model_path = 'models/enhanced_trading_agent_best_pnl.pt'
    if not os.path.exists(model_path):
        model_path = 'models/enhanced_trading_agent_latest.pt'
    if not os.path.exists(model_path):
        print("No model found to run in live mode!")
        return
    print(f"Loading model from {model_path}")
    checkpoint = torch.load(model_path, map_location=device)
    # Initialize models
    state_dim = 100
    action_dim = 3
    timeframes = ['1m', '15m', '1h']
    price_model = EnhancedPricePredictionModel(
        input_dim=2,
        hidden_dim=256,
        num_layers=3,
        output_dim=5,
        num_timeframes=len(timeframes)
    ).to(device)
    dqn_model = EnhancedDQN(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=512
    ).to(device)
    # Load model weights
    price_model.load_state_dict(checkpoint['price_model_state_dict'])
    dqn_model.load_state_dict(checkpoint['dqn_model_state_dict'])
    # Set models to evaluation mode
    price_model.eval()
    dqn_model.eval()
    # Run live trading
    print(f"Running in {'demo' if is_demo else 'live'} mode...")
    print("Press Ctrl+C to stop")
    # Initialize state
    from enhanced_training import initialize_state, step_environment
    state = initialize_state(exchange, timeframes)
    try:
        while True:
            # Select action
            with torch.no_grad():
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
                q_values, _, market_regime = dqn_model(state_tensor)
                action = q_values.argmax().item()
                # Get market regime prediction
                regime_probs = torch.softmax(market_regime, dim=1).cpu().numpy()[0]
                regime_names = ['Trending', 'Ranging', 'Volatile']
                predicted_regime = regime_names[regime_probs.argmax()]
            # Get current price
            ticker = exchange.get_ticker()
            current_price = ticker['last']
            # Print state
            print(f"\nCurrent price: ${current_price:.2f}")
            print(f"Predicted market regime: {predicted_regime} ({regime_probs.max()*100:.1f}% confidence)")
            # Execute action
            next_state, reward, _, trade_info = step_environment(
                exchange, state, action, price_model, timeframes, device
            )
            # Print action
            action_names = ['Hold', 'Buy', 'Sell']
            print(f"Action: {action_names[action]}")
            if trade_info is not None:
                print(f"Trade: {trade_info['action']} at {trade_info['price']:.2f}, Size: {trade_info['size']:.2f}, Entry Quality: {trade_info['entry_quality']:.2f}")
                # Execute real trade if not in demo mode
                if not is_demo:
                    if trade_info['action'] == 'buy':
                        order = exchange.create_order(
                            symbol="BTC/USDT",
                            type="market",
                            side="buy",
                            amount=trade_info['size'] / current_price
                        )
                        print(f"Executed buy order: {order}")
                    else:  # sell
                        order = exchange.create_order(
                            symbol="BTC/USDT",
                            type="market",
                            side="sell",
                            amount=trade_info['size'] / current_price
                        )
                        print(f"Executed sell order: {order}")
            # Update state
            state = next_state
            # Update exchange (simulate time passing)
            exchange.update()
            # Wait for next candle
            time.sleep(5)  # In a real implementation, this would wait for the next candle
    except KeyboardInterrupt:
        print("\nStopping live trading")
 if __name__ == "__main__":
    main() 
--- a/crypto/gogo2/test_cache.py
+++ b/crypto/gogo2/test_cache.py
@@ -0,0 +1,185 @@
 import os
 import sys
 import json
 import logging
 import time
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
 )
 logger = logging.getLogger('cache_test')
 # Import our cache implementation
 from data_cache import ohlcv_cache
 def generate_sample_data(num_candles=100):
    """Generate sample OHLCV data for testing"""
    data = []
    base_timestamp = int(time.time() * 1000) - (num_candles * 60 * 1000)  # Start from num_candles minutes ago
    for i in range(num_candles):
        timestamp = base_timestamp + (i * 60 * 1000)  # Add i minutes
        # Generate some random-ish but realistic looking price data
        base_price = 1900.0 + (i * 0.5)  # Slight uptrend
        open_price = base_price - 0.5 + (i % 3)
        close_price = base_price + 0.3 + ((i+1) % 4)
        high_price = max(open_price, close_price) + 1.0 + (i % 2)
        low_price = min(open_price, close_price) - 0.8 - (i % 2)
        volume = 10.0 + (i % 10) * 2.0
        data.append({
            'timestamp': timestamp,
            'open': open_price,
            'high': high_price,
            'low': low_price,
            'close': close_price,
            'volume': volume
        })
    return data
 def test_cache_save_load():
    """Test saving and loading data from cache"""
    logger.info("Testing cache save and load...")
    # Generate sample data
    data = generate_sample_data(100)
    logger.info(f"Generated {len(data)} sample candles")
    # Save to cache
    symbol = "ETH/USDT"
    timeframe = "1m"
    success = ohlcv_cache.save(data, symbol, timeframe)
    logger.info(f"Saved to cache: {success}")
    # Load from cache
    cached_data = ohlcv_cache.load(symbol, timeframe)
    logger.info(f"Loaded {len(cached_data) if cached_data else 0} candles from cache")
    # Verify data integrity
    if cached_data:
        first_original = data[0]
        first_cached = cached_data[0]
        logger.info(f"First original candle: {first_original}")
        logger.info(f"First cached candle: {first_cached}")
        last_original = data[-1]
        last_cached = cached_data[-1]
        logger.info(f"Last original candle: {last_original}")
        logger.info(f"Last cached candle: {last_cached}")
    return success and cached_data and len(cached_data) == len(data)
 def test_cache_append():
    """Test appending a new candle to cached data"""
    logger.info("Testing cache append...")
    # Generate sample data
    data = generate_sample_data(100)
    # Save to cache
    symbol = "ETH/USDT"
    timeframe = "5m"
    success = ohlcv_cache.save(data, symbol, timeframe)
    logger.info(f"Saved to cache: {success}")
    # Generate a new candle
    last_timestamp = data[-1]['timestamp']
    new_timestamp = last_timestamp + (5 * 60 * 1000)  # 5 minutes later
    new_candle = {
        'timestamp': new_timestamp,
        'open': 1950.0,
        'high': 1955.0,
        'low': 1948.0,
        'close': 1952.0,
        'volume': 15.0
    }
    # Append to cache
    success = ohlcv_cache.append(new_candle, symbol, timeframe)
    logger.info(f"Appended to cache: {success}")
    # Load from cache
    cached_data = ohlcv_cache.load(symbol, timeframe)
    logger.info(f"Loaded {len(cached_data) if cached_data else 0} candles from cache")
    # Verify the new candle was appended
    if cached_data:
        last_cached = cached_data[-1]
        logger.info(f"New candle: {new_candle}")
        logger.info(f"Last cached candle: {last_cached}")
    return success and cached_data and len(cached_data) == len(data) + 1
 def test_cache_dataframe():
    """Test converting cached data to a pandas DataFrame"""
    logger.info("Testing cache to DataFrame conversion...")
    # Generate sample data
    data = generate_sample_data(100)
    # Save to cache
    symbol = "ETH/USDT"
    timeframe = "15m"
    success = ohlcv_cache.save(data, symbol, timeframe)
    logger.info(f"Saved to cache: {success}")
    # Convert to DataFrame
    df = ohlcv_cache.to_dataframe(symbol, timeframe)
    logger.info(f"Converted to DataFrame with {len(df) if df is not None else 0} rows")
    # Display DataFrame info
    if df is not None:
        logger.info(f"DataFrame columns: {df.columns.tolist()}")
        logger.info(f"DataFrame index: {df.index.name}")
        logger.info(f"First row: {df.iloc[0].to_dict()}")
        logger.info(f"Last row: {df.iloc[-1].to_dict()}")
    return success and df is not None and len(df) == len(data)
 def main():
    """Run all tests"""
    logger.info("Starting cache tests...")
    # Run tests
    save_load_success = test_cache_save_load()
    append_success = test_cache_append()
    dataframe_success = test_cache_dataframe()
    # Print results
    logger.info("Test results:")
    logger.info(f"  Save/Load: {'PASS' if save_load_success else 'FAIL'}")
    logger.info(f"  Append: {'PASS' if append_success else 'FAIL'}")
    logger.info(f"  DataFrame: {'PASS' if dataframe_success else 'FAIL'}")
    # Check cache directory contents
    cache_dir = ohlcv_cache.cache_dir
    logger.info(f"Cache directory: {cache_dir}")
    if os.path.exists(cache_dir):
        files = os.listdir(cache_dir)
        logger.info(f"Cache files: {files}")
        # Print file sizes
        for file in files:
            file_path = os.path.join(cache_dir, file)
            size_kb = os.path.getsize(file_path) / 1024
            logger.info(f"  {file}: {size_kb:.2f} KB")
            # Print first few lines of each file
            with open(file_path, 'r') as f:
                data = json.load(f)
                logger.info(f"  Metadata: symbol={data.get('symbol')}, timeframe={data.get('timeframe')}, last_updated={datetime.fromtimestamp(data.get('last_updated')).strftime('%Y-%m-%d %H:%M:%S')}")
                logger.info(f"  Candles: {len(data.get('data', []))}")
    return save_load_success and append_success and dataframe_success
 if __name__ == "__main__":
    main()
`@@ -1,3 +1,5 @@`
		`pip install torch-tb-profiler`



	`ensure we use GPU if available to train faster. during training we need to have RL loop that looks at streaming data, and retrospective backtesting/training on predictions. sincr the start of the traing we're only loosing. implement robust penalty and analysis when closing a loosing trade and improve the reward function.`	`ensure we use GPU if available to train faster. during training we need to have RL loop that looks at streaming data, and retrospective backtesting/training on predictions. sincr the start of the traing we're only loosing. implement robust penalty and analysis when closing a loosing trade and improve the reward function.`