fix checkpoint model loading

2025-02-02 21:04:23 +02:00
parent 79c51c0d5d
commit 0ebf4e13bd
2 changed files with 60 additions and 51 deletions
--- a/crypto/brian/candles_cache.json
+++ b/crypto/brian/candles_cache.json
--- a/crypto/brian/index.py
+++ b/crypto/brian/index.py
@@ -47,7 +47,7 @@ def save_candles_cache(filename, candles_dict):
        print("Error saving cache file:", e)
 # -------------------------------------
-# Checkpoint Functions (same as before)
+# Checkpoint Functions
 # -------------------------------------
 def maintain_checkpoint_directory(directory, max_files=10):
    files = os.listdir(directory)
@@ -100,6 +100,8 @@ def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
    print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}")
 def load_best_checkpoint(model, best_dir=BEST_DIR):
    """Attempt to load the best checkpoint. If the architecture is different,
    catch the RuntimeError and skip loading."""
    best_models = get_best_models(best_dir)
    if not best_models:
        return None
@@ -107,7 +109,13 @@ def load_best_checkpoint(model, best_dir=BEST_DIR):
    path = os.path.join(best_dir, best_file)
    print(f"Loading best model from checkpoint: {best_file} with reward {best_reward:.4f}")
    checkpoint = torch.load(path)
-    model.load_state_dict(checkpoint["model_state_dict"])
+    try:
        model.load_state_dict(checkpoint["model_state_dict"])
    except RuntimeError as e:
        print("Warning: Failed to load best checkpoint due to:")
        print(e)
        print("This is likely due to a change in model architecture. Skipping checkpoint load.")
        return None
    return checkpoint
 # -------------------------------------
@@ -283,7 +291,6 @@ class BacktestEnvironment:
        base_ts = base_candle["timestamp"]
        for tf in self.timeframes:
            candles_list = self.candles_dict[tf]
            # Get the candle from this timeframe that is closest to (and <=) base_ts.
            aligned_index, _ = get_aligned_candle_with_index(candles_list, base_ts)
            features = get_features_for_tf(candles_list, aligned_index, period=10)
            state_features.extend(features)
@@ -392,15 +399,15 @@ def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1):
 # -------------------------------------
 async def main_backtest():
    symbol = 'BTC/USDT'
-    # Define timeframes: we'll use 5 different ones.
+    # Define timeframes: 5 different ones.
    timeframes = ["1m", "5m", "15m", "1h", "1d"]
    now = int(time.time() * 1000)
-    # Use the base timeframe period of 1500 candles. For 1m, that is 1500 minutes.
+    # For base timeframe 1m, get 1500 candles (1500 minutes)
    period_ms = 1500 * 60 * 1000
    since = now - period_ms
    end_time = now
-    # Initialize exchange using MEXC (or your preferred exchange).
+    # Initialize exchange using MEXC
    mexc_api_key = os.environ.get('MEXC_API_KEY', 'YOUR_API_KEY')
    mexc_api_secret = os.environ.get('MEXC_API_SECRET', 'YOUR_SECRET_KEY')
    exchange = ccxt.mexc({
@@ -409,58 +416,60 @@ async def main_backtest():
        'enableRateLimit': True,
    })
-    candles_dict = {}
+    try:
-    for tf in timeframes:
+        candles_dict = {}
-        print(f"Fetching historical data for timeframe {tf}...")
+        for tf in timeframes:
-        candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500)
+            print(f"Fetching historical data for timeframe {tf}...")
-        candles_dict[tf] = candles
+            candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500)
            candles_dict[tf] = candles
-    # Optionally, save the multi-timeframe cache.
+        # Optionally, save the multi-timeframe cache.
-    save_candles_cache(CACHE_FILE, candles_dict)
+        save_candles_cache(CACHE_FILE, candles_dict)
-    # Create the backtest environment using multi-timeframe data.
+        # Create the backtest environment using multi-timeframe data.
-    env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes)
+        env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes)
-    # Neural Network dimensions: each timeframe produces 7 features.
+        # Neural network dimensions: each timeframe produces 7 features.
-    input_dim = len(timeframes) * 7  # 7 features * 5 timeframes = 35.
+        input_dim = len(timeframes) * 7  # 7 features x 5 timeframes = 35.
-    hidden_dim = 128
+        hidden_dim = 128
-    output_dim = 3  # Actions: SELL, HOLD, BUY.
+        output_dim = 3  # Actions: SELL, HOLD, BUY.
-    model = TradingModel(input_dim, hidden_dim, output_dim)
+        model = TradingModel(input_dim, hidden_dim, output_dim)
-    optimizer = optim.Adam(model.parameters(), lr=1e-4)
+        optimizer = optim.Adam(model.parameters(), lr=1e-4)
-    replay_buffer = ReplayBuffer(capacity=10000)
+        replay_buffer = ReplayBuffer(capacity=10000)
-    rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
+        rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
-    # Load best checkpoint if available.
+        # Load best checkpoint if available. (In case of architecture change, it will be skipped.)
-    load_best_checkpoint(model, BEST_DIR)
+        load_best_checkpoint(model, BEST_DIR)
-    # Train the agent over the historical period.
+        # Train the agent over the historical period.
-    num_epochs = 10  # Adjust as needed.
+        num_epochs = 10  # Adjust as needed.
-    train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
+        train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
-    # Run a final simulation (without exploration) to record trade history.
+        # Run a final simulation (without exploration) to record trade history.
-    state = env.reset(clear_trade_history=True)
+        state = env.reset(clear_trade_history=True)
-    done = False
+        done = False
-    cumulative_reward = 0.0
+        cumulative_reward = 0.0
-    while not done:
+        while not done:
-        action = rl_agent.act(state, epsilon=0.0)
+            action = rl_agent.act(state, epsilon=0.0)
-        state, reward, next_state, done = env.step(action)
+            state, reward, next_state, done = env.step(action)
-        cumulative_reward += reward
+            cumulative_reward += reward
-        state = next_state
+            state = next_state
-    print("Final simulation cumulative profit:", cumulative_reward)
+        print("Final simulation cumulative profit:", cumulative_reward)
-    # Evaluate trade performance.
+        # Evaluate trade performance.
-    trades = env.trade_history
+        trades = env.trade_history
-    num_trades = len(trades)
+        num_trades = len(trades)
-    num_wins = sum(1 for trade in trades if trade["pnl"] > 0)
+        num_wins = sum(1 for trade in trades if trade["pnl"] > 0)
-    win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0
+        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0
-    total_profit = sum(trade["pnl"] for trade in trades)
+        total_profit = sum(trade["pnl"] for trade in trades)
-    print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}")
+        print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}")
-    # Plot chart with buy/sell markers on the base timeframe ("1m").
+        # Plot chart with buy/sell markers on the base timeframe ("1m").
-    plot_trade_history(candles_dict["1m"], trades)
+        plot_trade_history(candles_dict["1m"], trades)
-    
+    finally:
-    await exchange.close()
+        # Ensure that exchange resources are released even if errors occur.
        await exchange.close()
 if __name__ == "__main__":
    load_dotenv()