better saves

2025-03-17 23:36:44 +02:00 · 2025-03-17 23:36:44 +02:00 · bdf6afc6ad
commit bdf6afc6ad
parent 2e7a242ac7
5 changed files with 873 additions and 1 deletions
--- a/crypto/gogo2/.vscode/launch.json
+++ b/crypto/gogo2/.vscode/launch.json
@ -6,7 +6,7 @@
            "type": "python",
            "request": "launch",
            "program": "main.py",
-            "args": ["--mode", "train", "--episodes", "10"],
+            "args": ["--mode", "train", "--episodes", "100"],
            "console": "integratedTerminal",
            "justMyCode": true
        },
--- a/crypto/gogo2/MODEL_SAVING_FIX.md
+++ b/crypto/gogo2/MODEL_SAVING_FIX.md
@ -0,0 +1,74 @@
 # Model Saving Fix
 ## Issue
 During training sessions, PyTorch model saving operations sometimes fail with errors like:
 ```
 RuntimeError: [enforce fail at inline_container.cc:626] . unexpected pos 18278784 vs 18278680
 ```
 or 
 ```
 RuntimeError: [enforce fail at inline_container.cc:820] . PytorchStreamWriter failed writing file data/75: file write failed
 ```
 These errors occur in the PyTorch serialization mechanism when saving models using `torch.save()`.
 ## Solution
 We've implemented a robust model saving approach that uses multiple fallback methods if the primary save operation fails:
 1. **Attempt 1**: Save to a backup file first, then copy to the target path.
 2. **Attempt 2**: Use an older pickle protocol (pickle protocol 2) which can be more compatible.
 3. **Attempt 3**: Save without the optimizer state, which can reduce file size and avoid serialization issues.
 4. **Attempt 4**: Use TorchScript's `torch.jit.save()` instead of `torch.save()`, which uses a different serialization mechanism.
 ## Implementation
 The solution is implemented in two parts:
 1. A `robust_save` function that tries multiple saving approaches with fallbacks.
 2. A monkey patch that replaces the Agent's `save` method with our robust version.
 ### Example Usage
 ```python
 # Import the robust_save function
 from live_training import robust_save
 # Save a model with fallbacks
 success = robust_save(agent, "models/my_model.pt")
 if success:
    print("Model saved successfully!")
 else:
    print("All save attempts failed")
 ```
 ## Testing
 We've created a test script `test_save.py` that demonstrates the robust saving approach and verifies that it works correctly.
 To run the test:
 ```bash
 python test_save.py
 ```
 This script creates a simple model, attempts to save it using both the standard and robust methods, and reports on the results.
 ## Future Improvements
 Possible future improvements to the model saving mechanism:
 1. Additional fallback methods like serializing individual neural network layers.
 2. Automatic retry mechanism with exponential backoff.
 3. Asynchronous saving to avoid blocking the training loop.
 4. Checksumming saved models to verify integrity.
 ## Related Issues
 For more information on similar issues with PyTorch model saving, see:
 - https://github.com/pytorch/pytorch/issues/27736
 - https://github.com/pytorch/pytorch/issues/24045 
--- a/crypto/gogo2/live_training.py
+++ b/crypto/gogo2/live_training.py
@ -0,0 +1,498 @@
 #!/usr/bin/env python
 import asyncio
 import logging
 import sys
 import platform
 import argparse
 import os
 import datetime
 import traceback
 import numpy as np
 import torch
 from main import initialize_exchange, TradingEnvironment, Agent
 from torch.utils.tensorboard import SummaryWriter
 # Fix for Windows asyncio issues with aiodns
 if platform.system() == 'Windows':
    try:
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
        print("Using Windows SelectorEventLoopPolicy to fix aiodns issue")
    except Exception as e:
        print(f"Failed to set WindowsSelectorEventLoopPolicy: {e}")
 # Setup logging function
 def setup_logging():
    """Setup logging configuration for the application"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler("live_training.log"),
            logging.StreamHandler(sys.stdout)  # Added stdout handler for immediate feedback
        ]
    )
 # Set up logging
 setup_logging()
 logger = logging.getLogger(__name__)
 # Implement a robust save function to handle PyTorch serialization errors
 def robust_save(model, path):
    """
    Robust model saving with multiple fallback approaches
    Args:
        model: The Agent model to save
        path: Path to save the model
    Returns:
        bool: True if successful, False otherwise
    """
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
    # Backup path in case the main save fails
    backup_path = f"{path}.backup"
    # Attempt 1: Try with default settings in a separate file first
    try:
        logger.info(f"Saving model to {backup_path} (attempt 1)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, backup_path)
        logger.info(f"Successfully saved to {backup_path}")
        # If backup worked, copy to the actual path
        if os.path.exists(backup_path):
            import shutil
            shutil.copy(backup_path, path)
            logger.info(f"Copied backup to {path}")
            return True
    except Exception as e:
        logger.warning(f"First save attempt failed: {e}")
    # Attempt 2: Try with pickle protocol 2 (more compatible)
    try:
        logger.info(f"Saving model to {path} (attempt 2 - pickle protocol 2)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path, pickle_protocol=2)
        logger.info(f"Successfully saved to {path} with pickle_protocol=2")
        return True
    except Exception as e:
        logger.warning(f"Second save attempt failed: {e}")
    # Attempt 3: Try without optimizer state (which can be large and cause issues)
    try:
        logger.info(f"Saving model to {path} (attempt 3 - without optimizer)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path)
        logger.info(f"Successfully saved to {path} without optimizer state")
        return True
    except Exception as e:
        logger.warning(f"Third save attempt failed: {e}")
    # Attempt 4: Try with torch.jit.save instead
    try:
        logger.info(f"Saving model to {path} (attempt 4 - with jit.save)")
        # Save policy network using jit
        scripted_policy = torch.jit.script(model.policy_net)
        torch.jit.save(scripted_policy, f"{path}.policy.jit")
        # Save target network using jit
        scripted_target = torch.jit.script(model.target_net)
        torch.jit.save(scripted_target, f"{path}.target.jit")
        # Save epsilon value separately
        with open(f"{path}.epsilon.txt", "w") as f:
            f.write(str(model.epsilon))
        logger.info(f"Successfully saved model components with jit.save")
        return True
    except Exception as e:
        logger.error(f"All save attempts failed: {e}")
        return False
 # Implement fetch_and_update_data function
 async def fetch_and_update_data(exchange, env, symbol, timeframe):
    """
    Fetch new candle data and update the environment
    Args:
        exchange: CCXT exchange instance
        env: Trading environment instance
        symbol: Trading pair symbol
        timeframe: Timeframe for the candles
    """
    logger.info(f"Fetching new data for {symbol} on {timeframe} timeframe")
    try:
        # Default to 100 candles if not specified
        limit = 1000
        # Fetch OHLCV data
        candles = await exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
        if not candles or len(candles) == 0:
            logger.warning(f"No candles returned for {symbol} on {timeframe}")
            return False
        logger.info(f"Successfully fetched {len(candles)} candles")
        # Convert to format expected by environment
        formatted_candles = []
        for candle in candles:
            timestamp, open_price, high, low, close, volume = candle
            formatted_candles.append({
                'timestamp': timestamp,
                'open': open_price,
                'high': high,
                'low': low,
                'close': close,
                'volume': volume
            })
        # Update environment data
        env.data = formatted_candles
        if hasattr(env, '_initialize_features'):
            env._initialize_features()
        logger.info(f"Updated environment with {len(formatted_candles)} candles")
        # Print latest candle info
        if formatted_candles:
            latest = formatted_candles[-1]
            dt = datetime.datetime.fromtimestamp(latest['timestamp']/1000).strftime('%Y-%m-%d %H:%M:%S')
            logger.info(f"Latest candle: Time={dt}, Open={latest['open']}, High={latest['high']}, Low={latest['low']}, Close={latest['close']}, Volume={latest['volume']}")
        return True
    except Exception as e:
        logger.error(f"Error fetching candle data: {e}")
        logger.error(traceback.format_exc())
        return False
 async def live_training(
    symbol="ETH/USDT",
    timeframe="1m",
    model_path="models/trading_agent_best_pnl.pt",
    save_path="models/trading_agent_live_trained.pt",
    initial_balance=1000,
    update_interval=60,
    training_iterations=100,
    learning_rate=0.0001,
    batch_size=64,
    gamma=0.99,
    window_size=30,
    max_episodes=0,  # 0 means unlimited
 ):
    """
    Live training function that uses real market data to improve the model without executing real trades.
    Args:
        symbol: Trading pair symbol
        timeframe: Timeframe for training
        model_path: Path to the initial model to load
        save_path: Path to save the improved model
        initial_balance: Initial balance for simulation
        update_interval: Interval to update data in seconds
        training_iterations: Number of training iterations per data update
        learning_rate: Learning rate for training
        batch_size: Batch size for training
        gamma: Discount factor for training
        window_size: Window size for the environment
        max_episodes: Maximum number of episodes (0 for unlimited)
    """
    logger.info(f"Starting live training for {symbol} on {timeframe} timeframe")
    # Initialize exchange (without sandbox mode)
    exchange = None
    try:
        exchange = await initialize_exchange()
        logger.info(f"Exchange initialized: {exchange.id}")
        # Initialize environment
        env = TradingEnvironment(
            initial_balance=initial_balance,
            window_size=window_size,
            symbol=symbol,
            timeframe=timeframe,
        )
        # Fetch initial data
        logger.info(f"Fetching initial data for {symbol}")
        success = await fetch_and_update_data(exchange, env, symbol, timeframe)
        if not success:
            logger.error("Failed to fetch initial data, exiting")
            return
        # Initialize agent
        STATE_SIZE = env.get_state().shape[0] if hasattr(env, 'get_state') else 64
        ACTION_SIZE = env.action_space.n if hasattr(env.action_space, 'n') else 4
        agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, hidden_size=384)
        # Load model if provided
        if os.path.exists(model_path):
            try:
                agent.load(model_path)
                logger.info(f"Model loaded successfully from {model_path}")
            except Exception as e:
                logger.warning(f"Error loading model: {e}")
                logger.info("Starting with a new model")
        else:
            logger.warning(f"Model file {model_path} not found. Starting with a new model.")
        # Initialize TensorBoard writer
        run_id = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
        writer = SummaryWriter(log_dir=f"runs/live_training_{run_id}")
        agent.writer = writer
        # Initialize training statistics
        total_rewards = 0
        episode_count = 0
        best_reward = float('-inf')
        best_pnl = float('-inf')
        # Start live training loop
        logger.info(f"Starting live training loop")
        step_counter = 0
        last_update_time = datetime.datetime.now()
        while True:
            # Check if we've reached the maximum number of episodes
            if max_episodes > 0 and episode_count >= max_episodes:
                logger.info(f"Reached maximum episodes ({max_episodes}), stopping")
                break
            # Check if it's time to update data
            current_time = datetime.datetime.now()
            time_diff = (current_time - last_update_time).total_seconds()
            if time_diff >= update_interval:
                logger.info(f"Updating market data after {time_diff:.1f} seconds")
                success = await fetch_and_update_data(exchange, env, symbol, timeframe)
                if not success:
                    logger.warning("Failed to update data, will try again later")
                    # Wait a bit before trying again
                    await asyncio.sleep(5)
                    continue
                last_update_time = current_time
                # Run training iterations on the updated data
                episode_reward = 0
                env.reset()
                done = False
                # Run one simulated episode with the current data
                steps_in_episode = 0
                max_steps = len(env.data) - env.window_size - 1
                logger.info(f"Starting episode {episode_count + 1} with {max_steps} steps")
                while not done and steps_in_episode < max_steps:
                    try:
                        state = env.get_state()
                        action = agent.select_action(state, training=True)
                        try:
                            next_state, reward, done, info = env.step(action)
                        except ValueError as e:
                            logger.error(f"Error during env.step: {e}")
                            # If we get a ValueError, it might be because step is returning 3 values instead of 4
                            # Let's try to handle this case
                            if "too many values to unpack" in str(e):
                                logger.info("Trying alternative step format")
                                result = env.step(action)
                                if len(result) == 3:
                                    next_state, reward, done = result
                                    info = {}
                                else:
                                    raise
                            else:
                                raise
                        # Save experience in replay memory
                        agent.memory.push(state, action, reward, next_state, done)
                        # Move to the next state
                        state = next_state
                        episode_reward += reward
                        step_counter += 1
                        steps_in_episode += 1
                        # Log action and results every 50 steps
                        if steps_in_episode % 50 == 0:
                            logger.info(f"Step {steps_in_episode}/{max_steps} | Action: {action} | Reward: {reward:.2f} | Balance: ${env.balance:.2f}")
                        # Train the agent on a batch of experiences
                        if len(agent.memory) > batch_size:
                            agent.learn()
                            # Additional training iterations
                            if steps_in_episode % 10 == 0 and training_iterations > 1:
                                for _ in range(training_iterations - 1):
                                    agent.learn()
                        if done:
                            logger.info(f"Episode done after {steps_in_episode} steps")
                            break
                    except Exception as e:
                        logger.error(f"Error during episode step: {e}")
                        logger.error(traceback.format_exc())
                        break
                # Update training statistics
                episode_count += 1
                total_rewards += episode_reward
                avg_reward = total_rewards / episode_count
                # Track metrics
                writer.add_scalar('LiveTraining/Reward', episode_reward, episode_count)
                writer.add_scalar('LiveTraining/AvgReward', avg_reward, episode_count)
                writer.add_scalar('LiveTraining/Balance', env.balance, episode_count)
                writer.add_scalar('LiveTraining/PnL', env.total_pnl, episode_count)
                # Report progress
                logger.info(f"""
                Episode: {episode_count}
                Reward: {episode_reward:.2f}
                Avg Reward: {avg_reward:.2f}
                Balance: ${env.balance:.2f}
                PnL: ${env.total_pnl:.2f}
                Memory Size: {len(agent.memory)}
                Total Steps: {step_counter}
                """)
                # Save the model if it's the best so far (by reward or PnL)
                if episode_reward > best_reward:
                    best_reward = episode_reward
                    reward_model_path = f"models/trading_agent_best_reward_{run_id}.pt"
                    if robust_save(agent, reward_model_path):
                        logger.info(f"New best reward model saved: {episode_reward:.2f} to {reward_model_path}")
                    else:
                        logger.error(f"Failed to save best reward model")
                if env.total_pnl > best_pnl:
                    best_pnl = env.total_pnl
                    pnl_model_path = f"models/trading_agent_best_pnl_{run_id}.pt"
                    if robust_save(agent, pnl_model_path):
                        logger.info(f"New best PnL model saved: ${env.total_pnl:.2f} to {pnl_model_path}")
                    else:
                        logger.error(f"Failed to save best PnL model")
                # Regularly save the model
                if episode_count % 5 == 0:
                    if robust_save(agent, save_path):
                        logger.info(f"Model checkpoint saved to {save_path}")
                    else:
                        logger.error(f"Failed to save checkpoint")
                # Update target network periodically
                if episode_count % 5 == 0:
                    try:
                        agent.update_target_network()
                        logger.info("Target network updated")
                    except Exception as e:
                        logger.error(f"Error updating target network: {e}")
            # Sleep to avoid excessive API calls
            await asyncio.sleep(1)
    except asyncio.CancelledError:
        logger.info("Live training cancelled")
    except KeyboardInterrupt:
        logger.info("Live training stopped by user")
    except Exception as e:
        logger.error(f"Error in live training: {e}")
        logger.error(traceback.format_exc())
    finally:
        # Save final model
        if robust_save(agent, save_path):
            logger.info(f"Final model saved to {save_path}")
        else:
            logger.error(f"Failed to save final model")
        # Close TensorBoard writer
        try:
            writer.close()
            logger.info("TensorBoard writer closed")
        except Exception as e:
            logger.error(f"Error closing TensorBoard writer: {e}")
        # Close exchange connection
        if exchange:
            try:
                await exchange.close()
                logger.info("Exchange connection closed")
            except Exception as e:
                logger.error(f"Error closing exchange connection: {e}")
        logger.info("Live training completed")
 async def main():
    """Main function to parse arguments and start live training"""
    parser = argparse.ArgumentParser(description='Live Training with Real Market Data')
    parser.add_argument('--symbol', type=str, default='ETH/USDT', help='Trading pair symbol')
    parser.add_argument('--timeframe', type=str, default='1m', help='Timeframe for training')
    parser.add_argument('--model_path', type=str, default='models/trading_agent_best_pnl.pt', help='Path to initial model')
    parser.add_argument('--save_path', type=str, default='models/trading_agent_live_trained.pt', help='Path to save improved model')
    parser.add_argument('--initial_balance', type=float, default=1000, help='Initial balance for simulation')
    parser.add_argument('--update_interval', type=int, default=60, help='Interval to update data in seconds')
    parser.add_argument('--training_iterations', type=int, default=100, help='Training iterations per update')
    parser.add_argument('--max_episodes', type=int, default=0, help='Maximum number of episodes (0 for unlimited)')
    args = parser.parse_args()
    logger.info(f"Starting live training with {args.symbol} on {args.timeframe} timeframe")
    await live_training(
        symbol=args.symbol,
        timeframe=args.timeframe,
        model_path=args.model_path,
        save_path=args.save_path,
        initial_balance=args.initial_balance,
        update_interval=args.update_interval,
        training_iterations=args.training_iterations,
        max_episodes=args.max_episodes,
    )
 # At the beginning of the file, after importing the modules
 # Override Agent's save method with our robust save function
 def monkey_patch_agent_save():
    """Replace Agent's save method with our robust save approach"""
    original_save = Agent.save
    def patched_save(self, path):
        return robust_save(self, path)
    # Apply the patch
    Agent.save = patched_save
    logger.info("Monkey patched Agent.save with robust_save")
    # Return the original method in case we need to restore it
    return original_save
 # Call the monkey patch function at the appropriate place
 if __name__ == "__main__":
    try:
        print("Starting live training script")
        # Apply the monkey patch before running the main function
        original_save = monkey_patch_agent_save()
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Live training stopped by user")
    except Exception as e:
        logger.error(f"Error in main function: {e}")
        logger.error(traceback.format_exc()) 
--- a/crypto/gogo2/simplified_live_training.py
+++ b/crypto/gogo2/simplified_live_training.py
@ -0,0 +1,118 @@
 #!/usr/bin/env python
 import asyncio
 import logging
 import sys
 import platform
 import ccxt.async_support as ccxt
 import os
 import datetime
 # Fix for Windows asyncio issues with aiodns
 if platform.system() == 'Windows':
    try:
        import asyncio
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
        print("Using Windows SelectorEventLoopPolicy to fix aiodns issue")
    except Exception as e:
        print(f"Failed to set WindowsSelectorEventLoopPolicy: {e}")
 # Setup direct console logging for immediate feedback
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
 )
 logger = logging.getLogger(__name__)
 async def initialize_exchange():
    """Initialize the exchange with API credentials from environment variables"""
    exchange_id = 'mexc'
    try:
        # Get API credentials from environment variables
        api_key = os.getenv('MEXC_API_KEY', '')
        secret_key = os.getenv('MEXC_SECRET_KEY', '')
        # Initialize the exchange
        exchange_class = getattr(ccxt, exchange_id)
        exchange = exchange_class({
            'apiKey': api_key,
            'secret': secret_key,
            'enableRateLimit': True,
        })
        logger.info(f"Exchange initialized with standard CCXT: {exchange_id}")
        return exchange
    except Exception as e:
        logger.error(f"Error initializing exchange: {e}")
        raise
 async def fetch_ohlcv_data(exchange, symbol, timeframe, limit=1000):
    """Fetch OHLCV data from the exchange"""
    logger.info(f"Fetching {limit} {timeframe} candles for {symbol} (attempt 1/3)")
    try:
        candles = await exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
        if not candles or len(candles) == 0:
            logger.warning(f"No candles returned for {symbol} on {timeframe}")
            return None
        logger.info(f"Successfully fetched {len(candles)} candles")
        return candles
    except Exception as e:
        logger.error(f"Error fetching candle data: {e}")
        return None
 async def main():
    """Main function to test live data fetching"""
    symbol = "ETH/USDT"
    timeframe = "1m"
    logger.info(f"Starting simplified live training test for {symbol} on {timeframe}")
    try:
        # Initialize exchange
        exchange = await initialize_exchange()
        # Fetch data every 10 seconds
        for i in range(5):
            logger.info(f"Fetch attempt {i+1}/5")
            candles = await fetch_ohlcv_data(exchange, symbol, timeframe)
            if candles:
                # Print the latest candle
                latest = candles[-1]
                timestamp, open_price, high, low, close, volume = latest
                dt = datetime.datetime.fromtimestamp(timestamp/1000).strftime('%Y-%m-%d %H:%M:%S')
                logger.info(f"Latest candle: Time={dt}, Open={open_price}, High={high}, Low={low}, Close={close}, Volume={volume}")
            # Wait 10 seconds before next fetch
            if i < 4:  # Don't wait after the last fetch
                logger.info("Waiting 10 seconds before next fetch...")
                await asyncio.sleep(10)
        # Close exchange connection
        await exchange.close()
        logger.info("Exchange connection closed")
    except Exception as e:
        logger.error(f"Error in simplified live training test: {e}")
        import traceback
        logger.error(traceback.format_exc())
    finally:
        try:
            await exchange.close()
        except:
            pass
        logger.info("Test completed")
 if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Test stopped by user")
    except Exception as e:
        logger.error(f"Error in main function: {e}")
        import traceback
        logger.error(traceback.format_exc()) 
--- a/crypto/gogo2/test_save.py
+++ b/crypto/gogo2/test_save.py
@ -0,0 +1,182 @@
 #!/usr/bin/env python
 import torch
 import torch.nn as nn
 import os
 import logging
 import sys
 import platform
 # Fix for Windows asyncio issues with aiodns
 if platform.system() == 'Windows':
    try:
        import asyncio
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
        print("Using Windows SelectorEventLoopPolicy to fix aiodns issue")
    except Exception as e:
        print(f"Failed to set WindowsSelectorEventLoopPolicy: {e}")
 # Setup logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("test_save.log"),
        logging.StreamHandler(sys.stdout)
    ]
 )
 logger = logging.getLogger(__name__)
 # Define a simple model for testing
 class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 20)
        self.fc3 = nn.Linear(20, 5)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
 # Create a simple Agent class for testing
 class TestAgent:
    def __init__(self):
        self.policy_net = SimpleModel()
        self.target_net = SimpleModel()
        self.optimizer = torch.optim.Adam(self.policy_net.parameters(), lr=0.001)
        self.epsilon = 0.1
    def save(self, path):
        """Standard save method that might fail"""
        checkpoint = {
            'policy_net': self.policy_net.state_dict(),
            'target_net': self.target_net.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'epsilon': self.epsilon
        }
        torch.save(checkpoint, path)
        logger.info(f"Model saved to {path}")
 # Robust save function with multiple fallback approaches
 def robust_save(model, path):
    """
    Robust model saving with multiple fallback approaches
    Args:
        model: The Agent model to save
        path: Path to save the model
    Returns:
        bool: True if successful, False otherwise
    """
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
    # Backup path in case the main save fails
    backup_path = f"{path}.backup"
    # Attempt 1: Try with default settings in a separate file first
    try:
        logger.info(f"Saving model to {backup_path} (attempt 1)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, backup_path)
        logger.info(f"Successfully saved to {backup_path}")
        # If backup worked, copy to the actual path
        if os.path.exists(backup_path):
            import shutil
            shutil.copy(backup_path, path)
            logger.info(f"Copied backup to {path}")
            return True
    except Exception as e:
        logger.warning(f"First save attempt failed: {e}")
    # Attempt 2: Try with pickle protocol 2 (more compatible)
    try:
        logger.info(f"Saving model to {path} (attempt 2 - pickle protocol 2)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'optimizer': model.optimizer.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path, pickle_protocol=2)
        logger.info(f"Successfully saved to {path} with pickle_protocol=2")
        return True
    except Exception as e:
        logger.warning(f"Second save attempt failed: {e}")
    # Attempt 3: Try without optimizer state (which can be large and cause issues)
    try:
        logger.info(f"Saving model to {path} (attempt 3 - without optimizer)")
        checkpoint = {
            'policy_net': model.policy_net.state_dict(),
            'target_net': model.target_net.state_dict(),
            'epsilon': model.epsilon
        }
        torch.save(checkpoint, path)
        logger.info(f"Successfully saved to {path} without optimizer state")
        return True
    except Exception as e:
        logger.warning(f"Third save attempt failed: {e}")
    # Attempt 4: Try with torch.jit.save instead
    try:
        logger.info(f"Saving model to {path} (attempt 4 - with jit.save)")
        # Save policy network using jit
        scripted_policy = torch.jit.script(model.policy_net)
        torch.jit.save(scripted_policy, f"{path}.policy.jit")
        # Save target network using jit
        scripted_target = torch.jit.script(model.target_net)
        torch.jit.save(scripted_target, f"{path}.target.jit")
        # Save epsilon value separately
        with open(f"{path}.epsilon.txt", "w") as f:
            f.write(str(model.epsilon))
        logger.info(f"Successfully saved model components with jit.save")
        return True
    except Exception as e:
        logger.error(f"All save attempts failed: {e}")
        return False
 def main():
    # Create a test directory
    save_dir = "test_models"
    os.makedirs(save_dir, exist_ok=True)
    # Create a test agent
    agent = TestAgent()
    # Test the regular save method (might fail)
    try:
        logger.info("Testing regular save method...")
        save_path = os.path.join(save_dir, "regular_save.pt")
        agent.save(save_path)
        logger.info("Regular save succeeded")
    except Exception as e:
        logger.error(f"Regular save failed: {e}")
    # Test our robust save method
    logger.info("Testing robust save method...")
    save_path = os.path.join(save_dir, "robust_save.pt")
    success = robust_save(agent, save_path)
    if success:
        logger.info("Robust save succeeded!")
    else:
        logger.error("Robust save failed!")
    # Check which files were created
    logger.info("Files created:")
    for file in os.listdir(save_dir):
        file_path = os.path.join(save_dir, file)
        file_size = os.path.getsize(file_path)
        logger.info(f"  - {file} ({file_size} bytes)")
 if __name__ == "__main__":
    main()