gogo2/live_training.py

#!/usr/bin/env python
import asyncio
import logging
import sys
import platform
import argparse
import os
import datetime
import traceback
import numpy as np
import torch
import gc
from functools import partial
from main import initialize_exchange, TradingEnvironment, Agent
from torch.utils.tensorboard import SummaryWriter

# Fix for Windows asyncio issues with aiodns
if platform.system() == 'Windows':
    try:
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
        print("Using Windows SelectorEventLoopPolicy to fix aiodns issue")
    except Exception as e:
        print(f"Failed to set WindowsSelectorEventLoopPolicy: {e}")

# Setup logging function
def setup_logging():
    """Setup logging configuration for the application"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler("live_training.log"),
            logging.StreamHandler(sys.stdout)  # Added stdout handler for immediate feedback
        ]
    )

# Set up logging
setup_logging()
logger = logging.getLogger(__name__)

# Implement a robust save function to handle PyTorch serialization errors
def robust_save(model, path):
    """
    Robust model saving with multiple fallback approaches

    Args:
        model: The Agent model to save
        path: Path to save the model

    Returns:
        bool: True if successful, False otherwise
    """
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)

    # Backup path in case the main save fails
    backup_path = f"{path}.backup"

    # Clean up GPU memory before saving
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

    # Attempt 1: Try with default settings in a separate file first
    try:
        logger.info(f"Saving model to {backup_path} (attempt 1)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, backup_path)
        logger.info(f"Successfully saved to {backup_path}")

        # If backup worked, copy to the actual path
        if os.path.exists(backup_path):
            import shutil
            shutil.copy(backup_path, path)
            logger.info(f"Copied backup to {path}")
            return True
    except Exception as e:
        logger.warning(f"First save attempt failed: {e}")

    # Attempt 2: Try with pickle protocol 2 (more compatible)
    try:
        logger.info(f"Saving model to {path} (attempt 2 - pickle protocol 2)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path, pickle_protocol=2)
        logger.info(f"Successfully saved to {path} with pickle_protocol=2")
        return True
    except Exception as e:
        logger.warning(f"Second save attempt failed: {e}")

    # Attempt 3: Try without optimizer state (which can be large and cause issues)
    try:
        logger.info(f"Saving model to {path} (attempt 3 - without optimizer)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path)
        logger.info(f"Successfully saved to {path} without optimizer state")
        return True
    except Exception as e:
        logger.warning(f"Third save attempt failed: {e}")

    # Attempt 4: Try with torch.jit.save instead
    try:
        logger.info(f"Saving model to {path} (attempt 4 - with jit.save)")
        # Save policy network using jit
        scripted_policy = torch.jit.script(model.policy_net)
        torch.jit.save(scripted_policy, f"{path}.policy.jit")
        # Save target network using jit
        scripted_target = torch.jit.script(model.target_net)
        torch.jit.save(scripted_target, f"{path}.target.jit")
        # Save epsilon value separately
        with open(f"{path}.epsilon.txt", "w") as f:
            f.write(str(model.epsilon))
        logger.info(f"Successfully saved model components with jit.save")
        return True
    except Exception as e:
        logger.error(f"All save attempts failed: {e}")
        return False

# Implement timeout wrapper for exchange operations
async def with_timeout(coroutine, timeout=30, default=None):
    """
    Execute a coroutine with a timeout

    Args:
        coroutine: The coroutine to execute
        timeout: Timeout in seconds
        default: Default value to return on timeout

    Returns:
        The result of the coroutine or default value on timeout
    """
    try:
        return await asyncio.wait_for(coroutine, timeout=timeout)
    except asyncio.TimeoutError:
        logger.warning(f"Operation timed out after {timeout} seconds")
        return default
    except Exception as e:
        logger.error(f"Operation failed: {e}")
        return default

# Implement fetch_and_update_data function
async def fetch_and_update_data(exchange, env, symbol, timeframe):
    """
    Fetch new candle data and update the environment

    Args:
        exchange: CCXT exchange instance
        env: Trading environment instance
        symbol: Trading pair symbol
        timeframe: Timeframe for the candles
    """
    logger.info(f"Fetching new data for {symbol} on {timeframe} timeframe")

    try:
        # Default to 100 candles if not specified
        limit = 1000

        # Fetch OHLCV data with timeout
        candles = await with_timeout(
            exchange.fetch_ohlcv(symbol, timeframe, limit=limit),
            timeout=30,
            default=[]
        )

        if not candles or len(candles) == 0:
            logger.warning(f"No candles returned for {symbol} on {timeframe}")
            return False

        logger.info(f"Successfully fetched {len(candles)} candles")

        # Convert to format expected by environment
        formatted_candles = []
        for candle in candles:
            timestamp, open_price, high, low, close, volume = candle
            formatted_candles.append({
                'timestamp': timestamp,
                'open': open_price,
                'high': high,
                'low': low,
                'close': close,
                'volume': volume
            })

        # Update environment data
        env.data = formatted_candles
        if hasattr(env, '_initialize_features'):
            env._initialize_features()

        logger.info(f"Updated environment with {len(formatted_candles)} candles")

        # Print latest candle info
        if formatted_candles:
            latest = formatted_candles[-1]
            dt = datetime.datetime.fromtimestamp(latest['timestamp']/1000).strftime('%Y-%m-%d %H:%M:%S')
            logger.info(f"Latest candle: Time={dt}, Open={latest['open']}, High={latest['high']}, Low={latest['low']}, Close={latest['close']}, Volume={latest['volume']}")

        return True

    except Exception as e:
        logger.error(f"Error fetching candle data: {e}")
        logger.error(traceback.format_exc())
        return False

# Implement memory management function
def manage_memory():
    """
    Clean up memory to avoid memory leaks during long running sessions
    """
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    logger.debug("Memory cleaned")

async def live_training(
    symbol="ETH/USDT",
    timeframe="1m",
    model_path="models/trading_agent_best_pnl.pt",
    save_path="models/trading_agent_live_trained.pt",
    initial_balance=1000,
    update_interval=60,
    training_iterations=100,
    learning_rate=0.0001,
    batch_size=64,
    gamma=0.99,
    window_size=30,
    max_episodes=0,  # 0 means unlimited
    retry_delay=5,  # Seconds to wait before retrying after an error
    max_retries=3,  # Maximum number of retries for operations
):
    """
    Live training function that uses real market data to improve the model without executing real trades.

    Args:
        symbol: Trading pair symbol
        timeframe: Timeframe for training
        model_path: Path to the initial model to load
        save_path: Path to save the improved model
        initial_balance: Initial balance for simulation
        update_interval: Interval to update data in seconds
        training_iterations: Number of training iterations per data update
        learning_rate: Learning rate for training
        batch_size: Batch size for training
        gamma: Discount factor for training
        window_size: Window size for the environment
        max_episodes: Maximum number of episodes (0 for unlimited)
        retry_delay: Seconds to wait before retrying after an error
        max_retries: Maximum number of retries for operations
    """
    logger.info(f"Starting live training for {symbol} on {timeframe} timeframe")

    # Initialize exchange (without sandbox mode)
    exchange = None

    # Retry loop for exchange initialization
    for retry in range(max_retries):
        try:
            exchange = await initialize_exchange()
            logger.info(f"Exchange initialized: {exchange.id}")
            break
        except Exception as e:
            logger.error(f"Error initializing exchange (attempt {retry+1}/{max_retries}): {e}")
            if retry < max_retries - 1:
                logger.info(f"Retrying in {retry_delay} seconds...")
                await asyncio.sleep(retry_delay)
            else:
                logger.error("Max retries reached. Could not initialize exchange.")
                return

    try:
        # Initialize environment
        env = TradingEnvironment(
            initial_balance=initial_balance,
            window_size=window_size,
            symbol=symbol,
            timeframe=timeframe,
        )

        # Fetch initial data (with retries)
        logger.info(f"Fetching initial data for {symbol}")
        success = False
        for retry in range(max_retries):
            success = await fetch_and_update_data(exchange, env, symbol, timeframe)
            if success:
                break
            logger.warning(f"Failed to fetch initial data (attempt {retry+1}/{max_retries})")
            if retry < max_retries - 1:
                logger.info(f"Retrying in {retry_delay} seconds...")
                await asyncio.sleep(retry_delay)

        if not success:
            logger.error("Failed to fetch initial data after multiple attempts, exiting")
            return

        # Initialize agent
        STATE_SIZE = env.get_state().shape[0] if hasattr(env, 'get_state') else 64
        ACTION_SIZE = env.action_space.n if hasattr(env.action_space, 'n') else 4
        agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, hidden_size=384)

        # Load model if provided
        if os.path.exists(model_path):
            try:
                agent.load(model_path)
                logger.info(f"Model loaded successfully from {model_path}")
            except Exception as e:
                logger.warning(f"Error loading model: {e}")
                logger.info("Starting with a new model")
        else:
            logger.warning(f"Model file {model_path} not found. Starting with a new model.")

        # Initialize TensorBoard writer
        run_id = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
        writer = SummaryWriter(log_dir=f"runs/live_training_{run_id}")
        agent.writer = writer

        # Initialize training statistics
        total_rewards = 0
        episode_count = 0
        best_reward = float('-inf')
        best_pnl = float('-inf')

        # Start live training loop
        logger.info(f"Starting live training loop")

        step_counter = 0
        last_update_time = datetime.datetime.now()

        # Track consecutive errors to enable circuit breaker
        consecutive_errors = 0
        max_consecutive_errors = 5

        while True:
            # Check if we've reached the maximum number of episodes
            if max_episodes > 0 and episode_count >= max_episodes:
                logger.info(f"Reached maximum episodes ({max_episodes}), stopping")
                break

            # Check if it's time to update data
            current_time = datetime.datetime.now()
            time_diff = (current_time - last_update_time).total_seconds()

            if time_diff >= update_interval:
                logger.info(f"Updating market data after {time_diff:.1f} seconds")
                success = await fetch_and_update_data(exchange, env, symbol, timeframe)
                if not success:
                    logger.warning("Failed to update data, will try again later")
                    # Wait a bit before trying again
                    await asyncio.sleep(retry_delay)
                    continue

                last_update_time = current_time

                # Clean up memory before running an episode
                manage_memory()

                # Run training iterations on the updated data
                episode_reward = 0
                env.reset()
                done = False

                # Run one simulated episode with the current data
                steps_in_episode = 0
                max_steps = len(env.data) - env.window_size - 1

                logger.info(f"Starting episode {episode_count + 1} with {max_steps} steps")

                while not done and steps_in_episode < max_steps:
                    try:
                        state = env.get_state()
                        action = agent.select_action(state, training=True)

                        try:
                            next_state, reward, done, info = env.step(action)
                        except ValueError as e:
                            logger.error(f"Error during env.step: {e}")
                            # If we get a ValueError, it might be because step is returning 3 values instead of 4
                            # Let's try to handle this case
                            if "too many values to unpack" in str(e):
                                logger.info("Trying alternative step format")
                                result = env.step(action)
                                if len(result) == 3:
                                    next_state, reward, done = result
                                    info = {}
                                else:
                                    raise
                            else:
                                raise

                        # Save experience in replay memory
                        agent.memory.push(state, action, reward, next_state, done)

                        # Move to the next state
                        state = next_state
                        episode_reward += reward
                        step_counter += 1
                        steps_in_episode += 1

                        # Log action and results every 50 steps
                        if steps_in_episode % 50 == 0:
                            logger.info(f"Step {steps_in_episode}/{max_steps} | Action: {action} | Reward: {reward:.2f} | Balance: ${env.balance:.2f}")

                        # Train the agent on a batch of experiences
                        if len(agent.memory) > batch_size:
                            try:
                                agent.learn()

                                # Additional training iterations
                                if steps_in_episode % 10 == 0 and training_iterations > 1:
                                    for _ in range(training_iterations - 1):
                                        agent.learn()

                                # Reset consecutive errors counter on successful learning
                                consecutive_errors = 0
                            except Exception as e:
                                logger.error(f"Error during learning: {e}")
                                consecutive_errors += 1
                                if consecutive_errors >= max_consecutive_errors:
                                    logger.warning(f"Circuit breaker triggered after {max_consecutive_errors} consecutive errors")
                                    break

                        if done:
                            logger.info(f"Episode done after {steps_in_episode} steps")
                            break

                    except Exception as e:
                        logger.error(f"Error during episode step: {e}")
                        logger.error(traceback.format_exc())
                        consecutive_errors += 1
                        if consecutive_errors >= max_consecutive_errors:
                            logger.warning(f"Circuit breaker triggered after {max_consecutive_errors} consecutive errors")
                            break

                # Update training statistics
                episode_count += 1
                total_rewards += episode_reward
                avg_reward = total_rewards / episode_count

                # Track metrics
                writer.add_scalar('LiveTraining/Reward', episode_reward, episode_count)
                writer.add_scalar('LiveTraining/AvgReward', avg_reward, episode_count)
                writer.add_scalar('LiveTraining/Balance', env.balance, episode_count)
                writer.add_scalar('LiveTraining/PnL', env.total_pnl, episode_count)

                # Report progress
                logger.info(f"""
                Episode: {episode_count}
                Reward: {episode_reward:.2f}
                Avg Reward: {avg_reward:.2f}
                Balance: ${env.balance:.2f}
                PnL: ${env.total_pnl:.2f}
                Memory Size: {len(agent.memory)}
                Total Steps: {step_counter}
                """)

                # Save the model if it's the best so far (by reward or PnL)
                if episode_reward > best_reward:
                    best_reward = episode_reward
                    reward_model_path = f"models/trading_agent_best_reward_{run_id}.pt"
                    if robust_save(agent, reward_model_path):
                        logger.info(f"New best reward model saved: {episode_reward:.2f} to {reward_model_path}")
                    else:
                        logger.error(f"Failed to save best reward model")

                if env.total_pnl > best_pnl:
                    best_pnl = env.total_pnl
                    pnl_model_path = f"models/trading_agent_best_pnl_{run_id}.pt"
                    if robust_save(agent, pnl_model_path):
                        logger.info(f"New best PnL model saved: ${env.total_pnl:.2f} to {pnl_model_path}")
                    else:
                        logger.error(f"Failed to save best PnL model")

                # Regularly save the model
                if episode_count % 5 == 0:
                    if robust_save(agent, save_path):
                        logger.info(f"Model checkpoint saved to {save_path}")
                    else:
                        logger.error(f"Failed to save checkpoint")

                # Update target network periodically
                if episode_count % 5 == 0:
                    try:
                        agent.update_target_network()
                        logger.info("Target network updated")
                    except Exception as e:
                        logger.error(f"Error updating target network: {e}")

            # Sleep to avoid excessive API calls
            await asyncio.sleep(1)

    except asyncio.CancelledError:
        logger.info("Live training cancelled")
    except KeyboardInterrupt:
        logger.info("Live training stopped by user")
    except Exception as e:
        logger.error(f"Error in live training: {e}")
        logger.error(traceback.format_exc())
    finally:
        # Save final model
        if 'agent' in locals():
            if robust_save(agent, save_path):
                logger.info(f"Final model saved to {save_path}")
            else:
                logger.error(f"Failed to save final model")

            # Close TensorBoard writer
            try:
                writer.close()
                logger.info("TensorBoard writer closed")
            except Exception as e:
                logger.error(f"Error closing TensorBoard writer: {e}")

        # Close exchange connection
        if exchange:
            try:
                await with_timeout(exchange.close(), timeout=10)
                logger.info("Exchange connection closed")
            except Exception as e:
                logger.error(f"Error closing exchange connection: {e}")

        # Final memory cleanup
        manage_memory()
        logger.info("Live training completed")

async def main():
    """Main function to parse arguments and start live training"""
    parser = argparse.ArgumentParser(description='Live Training with Real Market Data')
    parser.add_argument('--symbol', type=str, default='ETH/USDT', help='Trading pair symbol')
    parser.add_argument('--timeframe', type=str, default='1m', help='Timeframe for training')
    parser.add_argument('--model_path', type=str, default='models/trading_agent_best_pnl.pt', help='Path to initial model')
    parser.add_argument('--save_path', type=str, default='models/trading_agent_live_trained.pt', help='Path to save improved model')
    parser.add_argument('--initial_balance', type=float, default=1000, help='Initial balance for simulation')
    parser.add_argument('--update_interval', type=int, default=60, help='Interval to update data in seconds')
    parser.add_argument('--training_iterations', type=int, default=100, help='Training iterations per update')
    parser.add_argument('--max_episodes', type=int, default=0, help='Maximum number of episodes (0 for unlimited)')
    parser.add_argument('--retry_delay', type=int, default=5, help='Seconds to wait before retrying after an error')
    parser.add_argument('--max_retries', type=int, default=3, help='Maximum number of retries for operations')

    args = parser.parse_args()

    logger.info(f"Starting live training with {args.symbol} on {args.timeframe} timeframe")

    await live_training(
        symbol=args.symbol,
        timeframe=args.timeframe,
        model_path=args.model_path,
        save_path=args.save_path,
        initial_balance=args.initial_balance,
        update_interval=args.update_interval,
        training_iterations=args.training_iterations,
        max_episodes=args.max_episodes,
        retry_delay=args.retry_delay,
        max_retries=args.max_retries,
    )

# Override Agent's save method with our robust save function
def monkey_patch_agent_save():
    """Replace Agent's save method with our robust save approach"""
    original_save = Agent.save

    def patched_save(self, path):
        return robust_save(self, path)

    # Apply the patch
    Agent.save = patched_save
    logger.info("Monkey patched Agent.save with robust_save")

    # Return the original method in case we need to restore it
    return original_save

# Call the monkey patch function at the appropriate place
if __name__ == "__main__":
    try:
        print("Starting live training script")
        # Apply the monkey patch before running the main function
        original_save = monkey_patch_agent_save()
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Live training stopped by user")
    except Exception as e:
        logger.error(f"Error in main function: {e}")
        logger.error(traceback.format_exc())